@1469 Cleanup the cleanup.
Took 4 hours 9 minutes
diff --git a/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java b/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
index cece582..cb1c40c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
@@ -410,7 +410,7 @@
private synchronized void initDynamicDefaults(Directory directory) throws IOException {
if (maxThreadCount == AUTO_DETECT_MERGES_AND_THREADS) {
- boolean spins = IOUtils.spins(directory);
+ boolean spins = false;
// Let tests override this to help reproducing a failure on a machine that has a different
// core count than the one where the test originally failed:
@@ -418,6 +418,8 @@
String value = System.getProperty(DEFAULT_SPINS_PROPERTY);
if (value != null) {
spins = Boolean.parseBoolean(value);
+ } else {
+ spins = IOUtils.spins(directory);
}
} catch (Exception ignored) {
// that's fine we might hit a SecurityException etc. here just continue
diff --git a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
index 4eede2c..fbca201 100644
--- a/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
+++ b/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
@@ -246,27 +246,13 @@
* @param config the configuration
*/
public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config) {
- this(solrHome, nodeProperties, config, false);
- }
-
- /**
- * Construct a JettySolrRunner
- *
- * After construction, you must start the jetty with {@link #start()}
- *
- * @param solrHome the solrHome to use
- * @param nodeProperties the container properties
- * @param config the configuration
- * @param enableProxy enables proxy feature to disable connections
- */
- public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config, boolean enableProxy) {
assert ObjectReleaseTracker.track(this);
SecurityManager s = System.getSecurityManager();
ThreadGroup group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
scheduler = new SolrScheduledExecutorScheduler("jetty-scheduler", null, group);
- this.enableProxy = enableProxy;
+ this.enableProxy = config.enableProxy;
this.solrHome = solrHome;
this.config = config;
this.nodeProperties = nodeProperties;
@@ -705,6 +691,7 @@
server.join();
} catch (InterruptedException e) {
SolrZkClient.checkInterrupted(e);
+ log.error("Interrupted waiting to stop", e);
throw new RuntimeException(e);
}
@@ -734,7 +721,7 @@
} catch (Exception e) {
SolrZkClient.checkInterrupted(e);
- log.error("", e);
+ log.error("Exception stopping jetty", e);
throw new RuntimeException(e);
} finally {
diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
index 942c6d7..8aaf8c9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
+++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
@@ -196,6 +196,10 @@
} else {
+ if (state == LEADER || state == POT_LEADER) {
+ return false;
+ }
+
String toWatch = seqs.get(0);
for (String node : seqs) {
if (leaderSeqNodeName.equals(node)) {
@@ -213,6 +217,8 @@
IOUtils.closeQuietly(oldWatcher);
}
+ state = WAITING_IN_ELECTION;
+
watcher = new ElectionWatcher(context.leaderSeqPath, watchedNode, context);
Stat exists = zkClient.exists(watchedNode, watcher);
if (exists == null) {
@@ -220,7 +226,7 @@
return true;
}
- state = WAITING_IN_ELECTION;
+
if (log.isDebugEnabled()) log.debug("Watching path {} to know if I could be the leader, my node is {}", watchedNode, context.leaderSeqPath);
return false;
@@ -268,7 +274,7 @@
// TODO: get this core param out of here
- protected void runIamLeaderProcess(final ElectionContext context, boolean weAreReplacement) throws KeeperException,
+ protected synchronized void runIamLeaderProcess(final ElectionContext context, boolean weAreReplacement) throws KeeperException,
InterruptedException, IOException {
if (state == CLOSED || isClosed) {
throw new AlreadyClosedException();
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 384d486..d4626c2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -248,7 +248,7 @@
this.zkStateWriter = new ZkStateWriter(zkController.getZkStateReader(), stats, this);
}
- public synchronized void start(String id, ElectionContext context) throws KeeperException {
+ public synchronized void start(String id, ElectionContext context, boolean weAreReplacement) throws KeeperException {
log.info("Starting Overseer");
if (getCoreContainer().isShutDown() || closeAndDone) {
if (log.isDebugEnabled()) log.debug("Already closed, exiting");
@@ -274,14 +274,14 @@
// stateManagmentExecutor = ParWork.getParExecutorService("stateManagmentExecutor",
// 1, 1, 3000, new SynchronousQueue());
taskExecutor = (ParWorkExecutor) ParWork.getParExecutorService("overseerTaskExecutor",
- 4, SysStats.PROC_COUNT * 2, 1000, new BlockingArrayQueue<>(32, 64));
+ 4, Math.max(4, SysStats.PROC_COUNT * 2), 1000, new BlockingArrayQueue<>(32, 64));
for (int i = 0; i < 4; i++) {
taskExecutor.prestartCoreThread();
}
zkWriterExecutor = (ParWorkExecutor) ParWork.getParExecutorService("overseerZkWriterExecutor",
- 4, SysStats.PROC_COUNT * 2, 1000, new BlockingArrayQueue<>(64, 128));
- for (int i = 0; i < 4; i++) {
+ 12, Math.max(12, SysStats.PROC_COUNT * 2), 1000, new BlockingArrayQueue<>(64, 128));
+ for (int i = 0; i < 12; i++) {
zkWriterExecutor.prestartCoreThread();
}
@@ -334,8 +334,8 @@
queueWatcher = new WorkQueueWatcher(getCoreContainer(), this);
collectionQueueWatcher = new CollectionWorkQueueWatcher(getCoreContainer(), id, overseerLbClient, adminPath, stats, Overseer.this);
try {
- queueWatcher.start();
- collectionQueueWatcher.start();
+ queueWatcher.start(weAreReplacement);
+ collectionQueueWatcher.start(weAreReplacement);
} catch (InterruptedException e) {
log.warn("interrupted", e);
}
@@ -727,7 +727,7 @@
this.path = path;
}
- public abstract void start() throws KeeperException, InterruptedException;
+ public abstract void start(boolean weAreReplacement) throws KeeperException, InterruptedException;
private List<String> getItems() {
try {
@@ -765,7 +765,7 @@
try {
List<String> items = getItems();
if (items.size() > 0) {
- processQueueItems(items, false);
+ processQueueItems(items, false, false);
}
} catch (AlreadyClosedException e) {
@@ -778,7 +778,7 @@
}
- protected abstract void processQueueItems(List<String> items, boolean onStart);
+ protected abstract void processQueueItems(List<String> items, boolean onStart, boolean weAreReplacement);
@Override
public void close() {
@@ -803,19 +803,19 @@
super(cc, overseer, Overseer.OVERSEER_QUEUE);
}
- public void start() throws KeeperException, InterruptedException {
+ public void start(boolean weAreReplacement) throws KeeperException, InterruptedException {
if (closed) return;
zkController.getZkClient().addWatch(path, this, AddWatchMode.PERSISTENT);
startItems = super.getItems();
log.info("Overseer found entries on start {} {}", startItems, path);
if (startItems.size() > 0) {
- processQueueItems(startItems, true);
+ processQueueItems(startItems, true, weAreReplacement);
}
}
@Override
- protected void processQueueItems(List<String> items, boolean onStart) {
+ protected void processQueueItems(List<String> items, boolean onStart, boolean weAreReplacement) {
//if (closed) return;
List<String> fullPaths = new ArrayList<>(items.size());
CountDownLatch delCountDownLatch = null;
@@ -861,29 +861,20 @@
stateUpdateMessage.getProperties().remove(StatePublisher.OPERATION);
for (Map.Entry<String,Object> stateUpdateEntry : stateUpdateMessage.getProperties().entrySet()) {
- if (OverseerAction.DOWNNODE.equals(OverseerAction.get(stateUpdateEntry.getKey()))) {
- if (onStart) {
+ OverseerAction oa = OverseerAction.get(stateUpdateEntry.getKey());
+
+ if (OverseerAction.RECOVERYNODE.equals(oa) || OverseerAction.DOWNNODE.equals(oa)) {
+ if (OverseerAction.DOWNNODE.equals(oa) && onStart && !weAreReplacement) {
continue;
}
- Overseer.this.zkStateWriter.getCS().forEach((coll, docColl) -> {
- String collId = Long.toString(docColl.getId());
- ConcurrentHashMap<String,ZkStateWriter.StateUpdate> updates = collStateUpdates.get(collId);
- if (updates == null) {
- updates = new ConcurrentHashMap<>( );
- collStateUpdates.put(collId, updates);
- }
- List<Replica> replicas = docColl.getReplicas();
- for (Replica replica : replicas) {
- if (replica.getNodeName().equals(stateUpdateEntry.getValue())) {
- if (log.isDebugEnabled()) log.debug("set down node operation {} for replica {}", op, replica);
- ZkStateWriter.StateUpdate update = new ZkStateWriter.StateUpdate();
- update.id = replica.getId();
- update.state = Replica.State.getShortState(Replica.State.DOWN);
- updates.put(update.id, update);
- }
- }
- });
- } else if (OverseerAction.RECOVERYNODE.equals(OverseerAction.get(stateUpdateEntry.getKey()))) {
+ Replica.State setState = null;
+ if (OverseerAction.DOWNNODE.equals(oa)) {
+ setState = Replica.State.DOWN;
+ } else if (OverseerAction.RECOVERYNODE.equals(oa)) {
+ setState = Replica.State.RECOVERING;
+ }
+
+ Replica.State finalSetState = setState;
Overseer.this.zkStateWriter.getCS().forEach((coll, docColl) -> {
String collId = Long.toString(docColl.getId());
ConcurrentHashMap<String,ZkStateWriter.StateUpdate> updates = collStateUpdates.get(collId);
@@ -894,10 +885,10 @@
List<Replica> replicas = docColl.getReplicas();
for (Replica replica : replicas) {
if (replica.getNodeName().equals(stateUpdateEntry.getValue())) {
- if (log.isDebugEnabled()) log.debug("set recovery node operation {} for replica {}", op, replica);
+ if (log.isDebugEnabled()) log.debug("set {} node operation {} for replica {}", finalSetState, op, replica);
ZkStateWriter.StateUpdate update = new ZkStateWriter.StateUpdate();
update.id = replica.getId();
- update.state = Replica.State.getShortState(Replica.State.RECOVERING);
+ update.state = Replica.State.getShortState(finalSetState);
updates.put(update.id, update);
}
}
@@ -906,7 +897,8 @@
for (Map.Entry<String,Object> stateUpdateEntry2 : stateUpdateMessage.getProperties().entrySet()) {
// if (log.isDebugEnabled()) log.debug("state cmd entry {} asOverseerCmd={}", entry, OverseerAction.get(stateUpdateEntry.getKey()));
- if (OverseerAction.DOWNNODE.equals(OverseerAction.get(stateUpdateEntry2.getKey())) || OverseerAction.RECOVERYNODE.equals(OverseerAction.get(stateUpdateEntry2.getKey()))) {
+ OverseerAction oa2 = OverseerAction.get(stateUpdateEntry2.getKey());
+ if (OverseerAction.RECOVERYNODE.equals(oa2) || OverseerAction.DOWNNODE.equals(oa2)) {
continue;
}
String id = stateUpdateEntry2.getKey();
@@ -1053,7 +1045,7 @@
}
@Override
- public void start() throws KeeperException, InterruptedException {
+ public void start(boolean weAreReplacement) throws KeeperException, InterruptedException {
if (closed) return;
zkController.getZkClient().addWatch(path, this, AddWatchMode.PERSISTENT);
@@ -1062,12 +1054,12 @@
log.info("Overseer found entries on start {}", startItems);
if (startItems.size() > 0) {
- processQueueItems(startItems, true);
+ processQueueItems(startItems, true, weAreReplacement);
}
}
@Override
- protected void processQueueItems(List<String> items, boolean onStart) {
+ protected void processQueueItems(List<String> items, boolean onStart, boolean weAreReplacement) {
if (closed) return;
ourLock.lock();
List<String> fullPaths = new ArrayList<>(items.size());
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
index a7e3391..302bfee 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerElectionContext.java
@@ -95,7 +95,7 @@
if (!overseer.getZkController().getCoreContainer().isShutDown() && !overseer.getZkController().isShutdownCalled()
&& !overseer.isDone()) {
log.info("Starting overseer after winning Overseer election {}", id);
- overseer.start(id, context);
+ overseer.start(id, context, weAreReplacement);
} else {
log.info("Will not start Overseer because we are closed");
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index 013e285..1b59355 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -28,6 +28,7 @@
import org.apache.solr.common.ParWork;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
@@ -185,6 +186,10 @@
if (log.isDebugEnabled()) log.debug("Stopping recovery for core=[{}]", coreName);
+ if (latch != null) {
+ latch.countDown();
+ }
+
try {
if (prevSendPreRecoveryHttpUriRequest != null) {
prevSendPreRecoveryHttpUriRequest.cancel();
@@ -199,9 +204,7 @@
finalReplicationHandler.abortFetch();
}
- if (latch != null) {
- latch.countDown();
- }
+
//ObjectReleaseTracker.release(this);
}
@@ -336,6 +339,9 @@
} catch (AlreadyClosedException e) {
log.info("AlreadyClosedException, won't do recovery", e);
return;
+ } catch (RejectedExecutionException e) {
+ log.info("RejectedExecutionException, won't do recovery", e);
+ return;
} catch (Exception e) {
ParWork.propagateInterrupt(e);
log.error("Exception during recovery", e);
@@ -626,6 +632,14 @@
while (!successfulRecovery && !isClosed() && !core.isClosing() && !core.isClosed()) {
cnt++;
try {
+
+ log.debug("Begin buffering updates. core=[{}]", coreName);
+ // recalling buffer updates will drop the old buffer tlog
+ if (ulog.getState() != UpdateLog.State.BUFFERING) {
+ ulog.bufferUpdates();
+ }
+
+
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
LeaderElector leaderElector = zkController.getLeaderElector(coreName);
@@ -636,6 +650,22 @@
return false;
}
+ DocCollection coll = zkStateReader.getClusterState().getCollectionOrNull(collection);
+ if (coll != null) {
+ Slice slice = coll.getSlice(shard);
+ if (slice != null) {
+ Replica leaderReplica = slice.getLeader();
+ if (leaderReplica != null) {
+ if (leaderReplica.getNodeName().equals(cc.getZkController().getNodeName())) {
+ leaderElector = cc.getZkController().getLeaderElector(leaderReplica.getName());
+ if (leaderElector == null || !leaderElector.isLeader()) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, leaderReplica.getName() + " is not current valid leader");
+ }
+ }
+ }
+ }
+ }
+
leader = zkController.getZkStateReader().getLeaderRetry(core.getCoreDescriptor().getCollectionName(), core.getCoreDescriptor().getCloudDescriptor().getShardId(), Integer.getInteger("solr.getleader.looptimeout", 8000));
if (leader != null && leader.getName().equals(coreName)) {
@@ -644,10 +674,6 @@
continue;
}
- log.debug("Begin buffering updates. core=[{}]", coreName);
- // recalling buffer updates will drop the old buffer tlog
- ulog.bufferUpdates();
-
// we wait a bit so that any updates on the leader
// that started before they saw recovering state
// are sure to have finished (see SOLR-7141 for
@@ -708,6 +734,11 @@
didReplication = true;
try {
+ // recalling buffer updates will drop the old buffer tlog
+ if (ulog.getState() != UpdateLog.State.BUFFERING) {
+ ulog.bufferUpdates();
+ }
+
try {
if (prevSendPreRecoveryHttpUriRequest != null) {
prevSendPreRecoveryHttpUriRequest.cancel();
@@ -716,11 +747,9 @@
// okay
}
log.debug("Begin buffering updates. core=[{}]", coreName);
- // recalling buffer updates will drop the old buffer tlog
- ulog.bufferUpdates();
- sendPrepRecoveryCmd(leader.getBaseUrl(), leader.getName(), zkStateReader.getClusterState().
- getCollection(core.getCoreDescriptor().getCollectionName()).getSlice(cloudDesc.getShardId()), core.getCoreDescriptor());
+
+ sendPrepRecoveryCmd(leader.getBaseUrl(), leader.getName(), core.getCoreDescriptor());
IndexFetcher.IndexFetchResult result = replicate(leader);
@@ -736,8 +765,8 @@
log.info("Replication Recovery was successful.");
successfulRecovery = true;
- } catch (InterruptedException | AlreadyClosedException e) {
- log.info("Interrupted or already closed, bailing on recovery");
+ } catch (InterruptedException | AlreadyClosedException | RejectedExecutionException e) {
+ log.info("{} bailing on recovery", e.getClass().getSimpleName());
close = true;
successfulRecovery = false;
break;
@@ -772,7 +801,7 @@
publishedActive = true;
close = true;
- } catch (AlreadyClosedException e) {
+ } catch (AlreadyClosedException | RejectedExecutionException e) {
log.error("Already closed");
successfulRecovery = false;
close = true;
@@ -789,7 +818,6 @@
if (successfulRecovery) {
recoveryListener.recovered();
}
-
}
if (!successfulRecovery && !isClosed()) {
@@ -941,12 +969,28 @@
return close || cc.isShutDown();
}
- final private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice, CoreDescriptor coreDescriptor) {
+ final private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, CoreDescriptor coreDescriptor) {
if (coreDescriptor.getCollectionName() == null) {
throw new IllegalStateException("Collection name cannot be null");
}
+ DocCollection coll = zkStateReader.getClusterState().getCollectionOrNull(collection);
+ if (coll != null) {
+ Slice slice = coll.getSlice(shard);
+ if (slice != null) {
+ Replica leaderReplica = slice.getLeader();
+ if (leaderReplica != null) {
+ if (leaderReplica.getNodeName().equals(cc.getZkController().getNodeName())) {
+ LeaderElector leaderElector = cc.getZkController().getLeaderElector(leaderReplica.getName());
+ if (leaderElector == null || !leaderElector.isLeader()) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, leaderCoreName + " is not current valid leader");
+ }
+ }
+ }
+ }
+ }
+
WaitForState prepCmd = new WaitForState();
prepCmd.setCoreName(coreName);
prepCmd.setLeaderName(leaderCoreName);
@@ -956,8 +1000,11 @@
log.info("Sending prep recovery command to {} for leader={} params={}", leaderBaseUrl, leaderCoreName, prepCmd.getParams());
- int conflictWaitMs = zkController.getLeaderConflictResolveWait();
- int readTimeout = conflictWaitMs + Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "7000"));
+ int readTimeout = Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "5000"));
+
+ if (isClosed()) {
+ throw new AlreadyClosedException();
+ }
try (Http2SolrClient client = new Http2SolrClient.Builder(leaderBaseUrl).withHttpClient(cc.getUpdateShardHandler().
getRecoveryOnlyClient()).idleTimeout(readTimeout).markInternalRequest().build()) {
@@ -969,10 +1016,13 @@
try {
prevSendPreRecoveryHttpUriRequest = result;
try {
- boolean success = latch.await(readTimeout, TimeUnit.MILLISECONDS);
+
+ boolean success = latch.await(readTimeout + 500, TimeUnit.MILLISECONDS);
if (!success) {
//result.cancel();
log.warn("Timeout waiting for prep recovery cmd on leader {}", leaderCoreName);
+ Thread.sleep(100);
+ throw new IllegalStateException("Timeout waiting for prep recovery cmd on leader " + leaderCoreName );
}
} catch (InterruptedException e) {
close = true;
@@ -1008,12 +1058,12 @@
@Override
public void onFailure(Throwable throwable, int code) {
- log.info("failed sending prep recovery cmd to leader");
+ log.info("failed sending prep recovery cmd to leader response code={}", code, throwable);
- if (throwable.getMessage().contains("Not the valid leader")) {
+ if (throwable != null && throwable.getMessage() != null && throwable.getMessage().contains("Not the valid leader")) {
try {
try {
- Thread.sleep(250);
+ Thread.sleep(10);
cc.getZkController().getZkStateReader().waitForState(RecoveryStrategy.this.collection, 3, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
if (collectionState == null) {
return false;
diff --git a/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java b/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java
index 8fe7abb..b2a6ad9 100644
--- a/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java
+++ b/solr/core/src/java/org/apache/solr/cloud/StatePublisher.java
@@ -17,6 +17,7 @@
package org.apache.solr.cloud;
import org.apache.solr.cloud.overseer.OverseerAction;
+import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.ParWork;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.DocCollection;
@@ -62,8 +63,9 @@
}
static final String PREFIX = "qn-";
public static final NoOpMessage TERMINATE_OP = new NoOpMessage();
+ public static final ConcurrentHashMap TERMINATE_OP_MAP = new ConcurrentHashMap();
- private final ArrayBlockingQueue<ZkNodeProps> workQueue = new ArrayBlockingQueue<>(1024, true);
+ private final ArrayBlockingQueue<ConcurrentHashMap> workQueue = new ArrayBlockingQueue<>(1024, true);
private final ZkDistributedQueue overseerJobQueue;
private volatile Worker worker;
private volatile Future<?> workerFuture;
@@ -71,6 +73,9 @@
private volatile boolean terminated;
private class Worker implements Runnable {
+ public static final int POLL_TIME_ON_PUBLISH_NODE = 1;
+ public static final int POLL_TIME = 5;
+
Worker() {
}
@@ -79,20 +84,19 @@
public void run() {
while (!terminated) {
-// if (!zkStateReader.getZkClient().isConnected()) {
-// try {
-// zkStateReader.getZkClient().getConnectionManager().waitForConnected(5000);
-// } catch (TimeoutException e) {
-// continue;
-// } catch (InterruptedException e) {
-// log.error("publisher interrupted", e);
-// }
-// continue;
-// }
-
- ZkNodeProps message = null;
- ZkNodeProps bulkMessage = new ZkNodeProps();
- bulkMessage.getProperties().put(OPERATION, "state");
+ if (!zkStateReader.getZkClient().isAlive()) {
+ try {
+ zkStateReader.getZkClient().getConnectionManager().waitForConnected(5000);
+ } catch (AlreadyClosedException e) {
+ log.warn("Hit already closed exception while waiting for zkclient to reconnect");
+ return;
+ } catch (Exception e) {
+ continue;
+ }
+ }
+ ConcurrentHashMap message = null;
+ ConcurrentHashMap bulkMessage = new ConcurrentHashMap();
+ bulkMessage.put(OPERATION, "state");
int pollTime = 250;
try {
try {
@@ -104,14 +108,15 @@
if (message != null) {
log.debug("Got state message " + message);
- if (message == TERMINATE_OP) {
+ if (message == TERMINATE_OP_MAP) {
log.debug("State publish is terminated");
terminated = true;
+ pollTime = 1;
} else {
if (bulkMessage(message, bulkMessage)) {
- pollTime = 20;
+ pollTime = POLL_TIME_ON_PUBLISH_NODE;
} else {
- pollTime = 150;
+ pollTime = POLL_TIME;
}
}
@@ -124,13 +129,14 @@
}
if (message != null) {
if (log.isDebugEnabled()) log.debug("Got state message " + message);
- if (message == TERMINATE_OP) {
+ if (message == TERMINATE_OP_MAP) {
terminated = true;
+ pollTime = 1;
} else {
if (bulkMessage(message, bulkMessage)) {
- pollTime = 10;
+ pollTime = POLL_TIME_ON_PUBLISH_NODE;
} else {
- pollTime = 25;
+ pollTime = POLL_TIME;
}
}
} else {
@@ -139,7 +145,7 @@
}
}
- if (bulkMessage.getProperties().size() > 1) {
+ if (bulkMessage.size() > 1) {
processMessage(bulkMessage);
} else {
log.debug("No messages to publish, loop");
@@ -155,31 +161,32 @@
}
}
- private boolean bulkMessage(ZkNodeProps zkNodeProps, ZkNodeProps bulkMessage) {
- if (OverseerAction.get(zkNodeProps.getStr(OPERATION)) == OverseerAction.DOWNNODE) {
- String nodeName = zkNodeProps.getStr(ZkStateReader.NODE_NAME_PROP);
+ private boolean bulkMessage(ConcurrentHashMap zkNodeProps, ConcurrentHashMap bulkMessage) {
+ if (OverseerAction.get((String) zkNodeProps.get(OPERATION)) == OverseerAction.DOWNNODE) {
+ String nodeName = (String) zkNodeProps.get(ZkStateReader.NODE_NAME_PROP);
//clearStatesForNode(bulkMessage, nodeName);
- bulkMessage.getProperties().put(OverseerAction.DOWNNODE.toLower(), nodeName);
+ bulkMessage.put(OverseerAction.DOWNNODE.toLower(), nodeName);
log.debug("bulk state publish down node, props={} result={}", zkNodeProps, bulkMessage);
-
- } else if (OverseerAction.get(zkNodeProps.getStr(OPERATION)) == OverseerAction.RECOVERYNODE) {
+ return true;
+ } else if (OverseerAction.get((String) zkNodeProps.get(OPERATION)) == OverseerAction.RECOVERYNODE) {
log.debug("bulk state publish recovery node, props={} result={}", zkNodeProps, bulkMessage);
- String nodeName = zkNodeProps.getStr(ZkStateReader.NODE_NAME_PROP);
+ String nodeName = (String) zkNodeProps.get(ZkStateReader.NODE_NAME_PROP);
// clearStatesForNode(bulkMessage, nodeName);
- bulkMessage.getProperties().put(OverseerAction.RECOVERYNODE.toLower(), nodeName);
+ bulkMessage.put(OverseerAction.RECOVERYNODE.toLower(), nodeName);
log.debug("bulk state publish recovery node, props={} result={}" , zkNodeProps, bulkMessage);
+ return true;
} else {
//String collection = zkNodeProps.getStr(ZkStateReader.COLLECTION_PROP);
- String core = zkNodeProps.getStr(ZkStateReader.CORE_NAME_PROP);
- String id = zkNodeProps.getStr("id");
- String state = zkNodeProps.getStr(ZkStateReader.STATE_PROP);
+ String core = (String) zkNodeProps.get(ZkStateReader.CORE_NAME_PROP);
+ String id = (String) zkNodeProps.get("id");
+ String state = (String) zkNodeProps.get(ZkStateReader.STATE_PROP);
String line = Replica.State.getShortState(Replica.State.valueOf(state.toUpperCase(Locale.ROOT)));
if (log.isDebugEnabled()) log.debug("bulk publish core={} id={} state={} line={}", core, id, state, line);
- bulkMessage.getProperties().put(id, line);
- if (state.equals(Replica.State.RECOVERING.toString())) {
- return true;
- }
+ bulkMessage.put(id, line);
+// if (state.equals(Replica.State.RECOVERING.toString())) {
+// return true;
+// }
}
return false;
}
@@ -207,7 +214,7 @@
}
}
- private void processMessage(ZkNodeProps message) throws KeeperException, InterruptedException {
+ private void processMessage(ConcurrentHashMap message) throws KeeperException, InterruptedException {
log.info("Send state updates to Overseer {}", message);
byte[] updates = Utils.toJSON(message);
@@ -246,11 +253,12 @@
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Nulls in published state " + stateMessage);
}
-// if ((state.equals(UpdateLog.State.ACTIVE.toString().toLowerCase(Locale.ROOT)) || state.equals("leader")) && cc.isCoreLoading(core)) {
-// cc.waitForLoadingCore(core, 10000);
-// }
-
DocCollection coll = zkStateReader.getClusterState().getCollectionOrNull(collection);
+
+ if (coll == null) {
+ zkStateReader.waitForState(collection, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState != null);
+ }
+
if (coll != null) {
Replica replica = coll.getReplica(core);
if (replica != null) {
@@ -262,7 +270,7 @@
CacheEntry lastState = stateCache.get(id);
//&& (System.currentTimeMillis() - lastState.time < 1000) &&
// TODO: needs work
-// if (state.equals(lastState.state)) {
+// if (replica != null && replica.getType() == Replica.Type.PULL && lastState != null && state.equals(lastState.state) && (System.currentTimeMillis() - lastState.time < 10000)) {
// log.info("Skipping publish state as {} for {}, because it was the last state published", state, core);
// return;
// }
@@ -321,7 +329,11 @@
}
}
- workQueue.offer(stateMessage);
+ if (stateMessage == TERMINATE_OP) {
+ workQueue.offer(TERMINATE_OP_MAP);
+ } else {
+ workQueue.offer(new ConcurrentHashMap(stateMessage.getProperties()));
+ }
} catch (Exception e) {
log.error("Exception trying to publish state message={}", stateMessage, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
@@ -342,9 +354,8 @@
}
public void close() {
- this.terminated = true;
try {
- workerFuture.cancel(false);
+ workerFuture.get();
} catch (Exception e) {
log.error("Exception waiting for close", e);
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c49425c..b748784 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -21,9 +21,7 @@
import org.apache.solr.client.solrj.cloud.LockListener;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient;
-import org.apache.solr.client.solrj.impl.Http2SolrClient;
import org.apache.solr.client.solrj.impl.SolrClientCloudManager;
-import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.ParWork;
@@ -52,7 +50,6 @@
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.CloseTracker;
import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.URLUtil;
@@ -115,6 +112,7 @@
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
+import java.util.function.Predicate;
/**
* Handle ZooKeeper interactions.
@@ -155,11 +153,6 @@
@Override
public void run() {
- try {
- publishNodeAs(getNodeName(), OverseerAction.DOWNNODE);
- } catch (Exception e) {
- log.warn("Problem publish node as DOWN", e);
- }
disconnect(true);
log.info("Continuing to Solr shutdown");
}
@@ -575,18 +568,15 @@
try (ParWork closer = new ParWork(this, true, false)) {
closer.collect("replicateFromLeaders", replicateFromLeaders);
closer.collect(leaderElectors);
+ }
-// if (publishDown) {
-// closer.collect("PublishNodeAsDown&RepFromLeaders", () -> {
-// try {
-// log.info("Publish this node as DOWN...");
-// publishNodeAs(getNodeName(), OverseerAction.DOWNNODE);
-// } catch (Exception e) {
-// ParWork.propagateInterrupt("Error publishing nodes as down. Continuing to close CoreContainer", e);
-// }
-// return "PublishDown";
-// });
-// }
+
+ if (publishDown) {
+ try {
+ publishNodeAs(getNodeName(), OverseerAction.DOWNNODE);
+ } catch (Exception e) {
+ log.warn("Problem publish node as DOWN", e);
+ }
}
}
@@ -606,19 +596,14 @@
closer.collect(cloudManager);
closer.collect(cloudSolrClient);
- closer.collect("", () -> {
- try {
- if (statePublisher != null) {
- statePublisher.submitState(StatePublisher.TERMINATE_OP);
- }
- } catch (Exception e) {
- log.error("Exception closing state publisher");
- }
- });
-
collectionToTerms.forEach((s, zkCollectionTerms) -> closer.collect(zkCollectionTerms));
} finally {
+ if (statePublisher != null) {
+ statePublisher.submitState(StatePublisher.TERMINATE_OP);
+ }
+
+ IOUtils.closeQuietly(statePublisher);
IOUtils.closeQuietly(overseerElector);
if (overseer != null) {
try {
@@ -627,6 +612,9 @@
log.warn("Exception closing Overseer", e);
}
}
+ if (zkStateReader != null) {
+ zkStateReader.disableCloseLock();
+ }
IOUtils.closeQuietly(zkStateReader);
if (closeZkClient && zkClient != null) {
@@ -1074,7 +1062,15 @@
zkStateReader = new ZkStateReader(zkClient, () -> {
if (cc != null) cc.securityNodeChanged();
});
+ zkStateReader.enableCloseLock();
zkStateReader.setNode(nodeName);
+ zkStateReader.setLeaderChecker(name -> {
+ LeaderElector elector = leaderElectors.get(name);
+ if (elector != null && elector.isLeader()) {
+ return true;
+ }
+ return false;
+ });
zkStateReader.setCollectionRemovedListener(this::removeCollectionTerms);
this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
@@ -1247,6 +1243,8 @@
return overseerElector != null && overseerElector.isLeader();
}
+ public static volatile Predicate<CoreDescriptor> testing_beforeRegisterInZk;
+
/**
* Register shard with ZooKeeper.
*
@@ -1256,6 +1254,14 @@
if (getCoreContainer().isShutDown() || isDcCalled()) {
throw new AlreadyClosedException();
}
+
+ if (testing_beforeRegisterInZk != null) {
+ boolean didTrigger = testing_beforeRegisterInZk.test(desc);
+ if (log.isDebugEnabled()) {
+ log.debug("{} pre-zk hook", (didTrigger ? "Ran" : "Skipped"));
+ }
+ }
+
MDCLoggingContext.setCoreName(desc.getName());
ZkShardTerms shardTerms = null;
// LeaderElector leaderElector = null;
@@ -1302,7 +1308,7 @@
log.info("Wait to see leader for {}, {}", collection, shardId);
String leaderName = null;
- for (int i = 0; i < 20; i++) {
+ for (int i = 0; i < 15; i++) {
if (isClosed() || isDcCalled() || cc.isShutDown()) {
throw new AlreadyClosedException();
}
@@ -1313,46 +1319,29 @@
break;
}
try {
- Replica leader = zkStateReader.getLeaderRetry(collection, shardId, Integer.getInteger("solr.getleader.looptimeout", 5000));
- leaderName = leader.getName();
+ DocCollection coll = zkStateReader.getClusterState().getCollectionOrNull(collection);
+ if (coll != null) {
+ Slice slice = coll.getSlice(shardId);
+ if (slice != null) {
+ Replica leaderReplica = slice.getLeader();
+ if (leaderReplica != null) {
+ if (leaderReplica.getNodeName().equals(getNodeName())) {
+ leaderElector = leaderElectors.get(leaderReplica.getName());
- boolean isLeader = leaderName.equals(coreName);
-
- if (isLeader) {
- if (leaderElector != null && leaderElector.isLeader()) {
- break;
- } else {
- Thread.sleep(100);
- }
- } else {
- boolean stop = true;
- CoreAdminRequest.WaitForState prepCmd = new CoreAdminRequest.WaitForState();
- prepCmd.setCoreName(leader.getName());
- prepCmd.setLeaderName(leader.getName());
- prepCmd.setCollection(collection);
- prepCmd.setShardId(shardId);
-
- int readTimeout = Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "7000"));
-
- try (Http2SolrClient client = new Http2SolrClient.Builder(leader.getBaseUrl()).idleTimeout(readTimeout).withHttpClient(cc.getUpdateShardHandler().getTheSharedHttpClient()).markInternalRequest().build()) {
-
- prepCmd.setBasePath(leader.getBaseUrl());
-
- try {
- NamedList<Object> result = client.request(prepCmd);
- } catch (Exception e) {
- log.info("failed checking for leader {} {}", leader.getName(), e.getMessage());
- stop = false;
+ if (leaderElector != null && leaderElector.isLeader()) {
+ leaderName = leaderReplica.getName();
+ break;
+ }
+ }
}
}
- if (stop) {
- break;
- } else {
- Thread.sleep(100);
- }
}
+
+ Replica leader = zkStateReader.getLeaderRetry(getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient(),collection, shardId, Integer.getInteger("solr.getleader.looptimeout", 2000), true);
+ leaderName = leader.getName();
+ break;
- } catch (TimeoutException timeoutException) {
+ } catch (TimeoutException | InterruptedException e) {
if (isClosed() || isDcCalled() || cc.isShutDown()) {
throw new AlreadyClosedException();
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
index 9438f80..8f187ae 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java
@@ -156,7 +156,7 @@
final String alias = message.getStr(ALIAS, collectionName);
if (log.isDebugEnabled()) log.debug("Create collection {}", collectionName);
CountDownLatch latch = new CountDownLatch(1);
- zkStateReader.getZkClient().getSolrZooKeeper().sync(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName, (rc, path, ctx) -> {
+ zkStateReader.getZkClient().getConnectionManager().getKeeper().sync(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName, (rc, path, ctx) -> {
latch.countDown();
}, null);
latch.await(5, TimeUnit.SECONDS);
@@ -356,6 +356,8 @@
if (log.isDebugEnabled()) log.debug("CreateCollectionCmd clusterstate={}", clusterState);
CollectionCmdResponse.Response response = new CollectionCmdResponse.Response();
+ final Map<String, ShardRequest> cores = Collections.unmodifiableMap(coresToCreate);
+
List<ReplicaPosition> finalReplicaPositions = replicaPositions;
response.asyncFinalRunner = new OverseerCollectionMessageHandler.Finalize() {
@Override
@@ -402,7 +404,7 @@
if (c == null) {
return false;
}
- for (String name : coresToCreate.keySet()) {
+ for (String name : cores.keySet()) {
log.debug("look for core {} {} {} {}", name, c.getReplica(name), c.getReplica(name).getState(), c.getReplica(name).getState() != Replica.State.ACTIVE);
if (c.getReplica(name) == null || c.getReplica(name).getState() != Replica.State.ACTIVE) {
log.debug("not the right replica or state {}", c.getReplica(name));
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java
index 72bb3bb..0eabb54 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/DeleteCollectionCmd.java
@@ -107,7 +107,7 @@
log.info("Check if collection exists in zookeeper {}", collection);
CountDownLatch latch = new CountDownLatch(1);
- zkStateReader.getZkClient().getSolrZooKeeper().sync(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, (rc, path, ctx) -> {
+ zkStateReader.getZkClient().getConnectionManager().getKeeper().sync(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, (rc, path, ctx) -> {
latch.countDown();
}, null);
latch.await(10, TimeUnit.SECONDS);
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
index 6fc989b..40f0446 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
@@ -42,6 +42,7 @@
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
public class CollectionMutator {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -159,7 +160,7 @@
}
return clusterState.copyWith(coll.getName(),
- new DocCollection(coll.getName(), coll.getSlicesMap(), m, coll.getRouter(), coll.getZNodeVersion(), coll.getStateUpdates()));
+ new DocCollection(coll.getName(), coll.getSlicesMap(), m, coll.getRouter(), coll.getZNodeVersion(), (ConcurrentHashMap) coll.getStateUpdates()));
}
public static DocCollection updateSlice(String collectionName, DocCollection collection, Slice slice) {
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/OverseerAction.java b/solr/core/src/java/org/apache/solr/cloud/overseer/OverseerAction.java
index abcd76c..c222586 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/OverseerAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/OverseerAction.java
@@ -18,8 +18,6 @@
import java.util.Locale;
-import org.apache.solr.common.ParWork;
-
/**
* Enum of actions supported by the overseer only.
*
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
index 5c98e6b..7ac9f17 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
@@ -99,9 +99,11 @@
String collectionName = docCollection.getName();
ColState collState = collLocks.compute(collectionName, (s, colState) -> {
if (colState == null) {
+ log.debug("create new collection lock for {}", collectionName);
ColState cState = new ColState();
return cState;
}
+ log.debug("use existing collection lock for {}", collectionName);
return colState;
});
collState.collLock.lock();
@@ -115,12 +117,12 @@
if (currentCollection != null) {
docCollection.setZnodeVersion(currentCollection.getZNodeVersion());
- currentCollection.getProperties().keySet().retainAll(docCollection.getProperties().keySet());
List<String> removeSlices = new ArrayList();
for (Slice slice : docCollection) {
Slice currentSlice = currentCollection.getSlice(slice.getName());
if (currentSlice != null) {
if (currentSlice.get("remove") != null || slice.getProperties().get("remove") != null) {
+ log.debug("remove slice {}", slice.getName());
removeSlices.add(slice.getName());
} else {
currentCollection.getSlicesMap().put(slice.getName(), slice.update(currentSlice));
@@ -146,7 +148,17 @@
for (String removeSlice : removeSlices) {
currentCollection.getSlicesMap().remove(removeSlice);
}
- cs.put(currentCollection.getName(), currentCollection);
+ Map properties = new HashMap(currentCollection.getProperties());
+ properties.keySet().retainAll(docCollection.getProperties().keySet());
+ Set<Map.Entry<String,Object>> entries = docCollection.getProperties().entrySet();
+ for (Map.Entry<String,Object> entry : entries) {
+ properties.putIfAbsent(entry.getKey(), entry.getValue());
+ }
+
+ DocCollection newCollection = new DocCollection(collectionName, currentCollection.getSlicesMap(), properties, currentCollection.getRouter(),
+ currentCollection.getZNodeVersion(), (ConcurrentHashMap) currentCollection.getStateUpdates());
+ log.debug("zkwriter newCollection={}", newCollection);
+ cs.put(currentCollection.getName(), newCollection);
} else {
docCollection.getProperties().remove("pullReplicas");
@@ -166,7 +178,9 @@
for (String removeSlice : removeSlices) {
docCollection.getSlicesMap().remove(removeSlice);
}
-
+ String path = ZkStateReader.getCollectionPath(collectionName);
+ // Stat stat = reader.getZkClient().exists(path, null, false, false);
+ //docCollection.setZnodeVersion(stat.getVersion());
cs.put(docCollection.getName(), docCollection);
}
@@ -196,16 +210,23 @@
//throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Collection not found by id=" + collectionId);
}
- ConcurrentHashMap updates = stateUpdates.get(collection);
- if (updates == null) {
- updates = new ConcurrentHashMap();
- stateUpdates.put(collection, updates);
- }
+ ConcurrentHashMap updates;
DocCollection docColl = cs.get(collection);
String csVersion;
if (docColl != null) {
- csVersion = Integer.toString(docColl.getZNodeVersion());
+
+ updates = stateUpdates.get(collection);
+ if (updates == null) {
+ updates = (ConcurrentHashMap) docColl.getStateUpdates();
+ if (updates == null) {
+ updates = new ConcurrentHashMap();
+ }
+ stateUpdates.put(collection, updates);
+ }
+
+ int clusterStateVersion = docColl.getZNodeVersion();
+ csVersion = Integer.toString(clusterStateVersion);
for (StateUpdate state : entry.getValue().values()) {
if (state.sliceState != null) {
Slice slice = docColl.getSlice(state.sliceName);
@@ -258,7 +279,8 @@
log.trace("add new slice leader={} {} {}", newSlice.getLeader(), newSlice, docColl);
- DocCollection newDocCollection = new DocCollection(collection, newSlices, docColl.getProperties(), docColl.getRouter(), docColl.getZNodeVersion(), docColl.getStateUpdates());
+ DocCollection newDocCollection = new DocCollection(collection, newSlices, docColl.getProperties(), docColl.getRouter(), docColl.getZNodeVersion(),
+ (ConcurrentHashMap) docColl.getStateUpdates());
cs.put(collection, newDocCollection);
docColl = newDocCollection;
updates.put(replica.getInternalId(), "l");
@@ -287,7 +309,8 @@
log.trace("add new slice leader={} {}", newSlice.getLeader(), newSlice);
- DocCollection newDocCollection = new DocCollection(collection, newSlices, docColl.getProperties(), docColl.getRouter(), docColl.getZNodeVersion(), docColl.getStateUpdates());
+ DocCollection newDocCollection = new DocCollection(collection, newSlices, docColl.getProperties(), docColl.getRouter(), docColl.getZNodeVersion(),
+ (ConcurrentHashMap) docColl.getStateUpdates());
cs.put(collection, newDocCollection);
docColl = newDocCollection;
updates.put(replica.getInternalId(), state.state);
@@ -298,6 +321,12 @@
}
}
} else {
+ updates = stateUpdates.get(collection);
+ if (updates == null) {
+ updates = new ConcurrentHashMap();
+ stateUpdates.put(collection, updates);
+ }
+
for (StateUpdate state : entry.getValue().values()) {
log.debug("Could not find existing collection name={}", collection);
String setState = Replica.State.shortStateToState(state.state).toString();
@@ -350,7 +379,7 @@
write(collection);
break;
} catch (KeeperException.BadVersionException e) {
-
+ log.warn("hit bad version trying to write state.json, trying again ...");
} catch (Exception e) {
log.error("write pending failed", e);
break;
@@ -409,7 +438,7 @@
if (log.isDebugEnabled()) log.debug("Write state.json prevVersion={} bytes={} col={}", collection.getZNodeVersion(), data.length, collection);
Integer finalVersion = collection.getZNodeVersion();
- dirtyStructure.remove(collection.getName());
+
if (reader == null) {
log.error("read not initialized in zkstatewriter");
}
@@ -422,15 +451,21 @@
stat = reader.getZkClient().setData(path, data, finalVersion, true, false);
collection.setZnodeVersion(finalVersion + 1);
-
+ dirtyStructure.remove(collection.getName());
if (log.isDebugEnabled()) log.debug("set new version {} {}", collection.getName(), stat.getVersion());
} catch (KeeperException.NoNodeException e) {
log.debug("No node found for state.json", e);
} catch (KeeperException.BadVersionException bve) {
stat = reader.getZkClient().exists(path, null, false, false);
- log.info("Tried to update state.json ({}) with bad version {} \n {}", collection, finalVersion, stat != null ? stat.getVersion() : "null");
+ log.info("Tried to update state.json for {} with bad version {} found={} \n {}", coll, finalVersion, stat != null ? stat.getVersion() : "null", collection);
+
+ // collection.setZnodeVersion(stat.getVersion());
+
+ // reader.forciblyRefreshClusterStateSlow(coll);
+ // cs.put(coll,reader.getCollectionOrNull(coll));
+ collection.setZnodeVersion(stat.getVersion());
throw bve;
}
@@ -438,6 +473,7 @@
ConcurrentHashMap updates = stateUpdates.get(collection.getName());
if (updates != null) {
+ // TODO: clearing these correctly is tricky
updates.clear();
writeStateUpdates(collection, updates);
}
@@ -463,13 +499,14 @@
log.error("Failed processing update=" + collection, e);
}
- if (badVersionException.get() != null) {
- throw badVersionException.get();
- }
-
} finally {
collState.collLock.unlock();
}
+
+ if (badVersionException.get() != null) {
+ throw badVersionException.get();
+ }
+
}
private void writeStateUpdates(DocCollection collection, ConcurrentHashMap updates) throws KeeperException, InterruptedException {
@@ -574,6 +611,7 @@
ClusterState readerState = reader.getClusterState();
if (readerState != null) {
reader.forciblyRefreshAllClusterStateSlow();
+ readerState = reader.getClusterState();
cs.putAll(readerState.copy().getCollectionsMap());
}
diff --git a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
index 4c6ced7..73259a1 100644
--- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
@@ -179,10 +179,10 @@
@Override
public void close() throws IOException {
if (log.isTraceEnabled()) log.trace("close() - start");
-
+ closed = true;
synchronized (this) {
- closed = true;
+
if (log.isDebugEnabled()) log.debug("Closing {} - {} directories currently being tracked", this.getClass().getSimpleName(), byDirectoryCache.size());
Collection<CacheValue> values = new HashSet<>(byDirectoryCache.values());
for (CacheValue val : values) {
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 5bab87c..26eb285 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -195,11 +195,17 @@
4, 256, 1000, new BlockingArrayQueue<>(64, 64));
public final ThreadPoolExecutor coreContainerExecutor = (ThreadPoolExecutor) ParWork.getParExecutorService("Core",
- 4, SysStats.PROC_COUNT * 2, 1000, new BlockingArrayQueue<>(64, 64));
+ 4, Math.max(4, SysStats.PROC_COUNT * 2), 1000, new BlockingArrayQueue<>(64, 64));
{
- solrCoreExecutor.prestartAllCoreThreads();
- coreContainerExecutor.prestartAllCoreThreads();
+ for (int i = 0; i < 12; i++) {
+ solrCoreExecutor.prestartCoreThread();
+ }
+
+ for (int i = 0; i < 4; i++) {
+ coreContainerExecutor.prestartCoreThread();
+ }
+
}
private final OrderedExecutor replayUpdatesExecutor;
@@ -693,12 +699,6 @@
}
if (isZooKeeperAware()) {
- try {
- getZkController().publishNodeAs(getZkController().getNodeName(), OverseerAction.RECOVERYNODE);
- } catch (Exception e) {
- log.error("Failed publishing loading core as recovering", e);
- }
-
List<CoreDescriptor> removeCds = new ArrayList<>();
for (final CoreDescriptor cd : cds) {
@@ -719,13 +719,21 @@
} catch (Exception e) {
SolrException.log(log, "Failed to delete instance dir for core:" + cd.getName() + " dir:" + cd.getInstanceDir());
}
-
+ continue;
}
}
markCoreAsLoading(cd.getName());
String collection = cd.getCollectionName();
- getZkController().getZkStateReader().registerCore(collection, cd.getName());
-
+ try {
+ getZkController().getZkStateReader().registerCore(collection, cd.getName());
+ } catch (Exception e) {
+ log.error("Failed registering core with zkstatereader", e);
+ }
+ }
+ try {
+ getZkController().publishNodeAs(getZkController().getNodeName(), OverseerAction.RECOVERYNODE);
+ } catch (Exception e) {
+ log.error("Failed publishing loading core as recovering", e);
}
for (CoreDescriptor removeCd : removeCds) {
cds.remove(removeCd);
@@ -736,6 +744,8 @@
}
}
+
+ try {
// Always add $SOLR_HOME/lib to the shared resource loader
Set<String> libDirs = new LinkedHashSet<>();
libDirs.add("lib");
@@ -766,7 +776,7 @@
containerHandlers.getApiBag().registerObject(packageStoreAPI.readAPI);
containerHandlers.getApiBag().registerObject(packageStoreAPI.writeAPI);
- try {
+
logging = LogWatcher.newRegisteredLogWatcher(cfg.getLogWatcherConfig(), loader);
@@ -853,10 +863,7 @@
});
}
- } catch (Exception e) {
- log.error("Exception in CoreContainer load", e);
- throw new SolrException(ErrorCode.SERVER_ERROR, "Exception in CoreContainer load", e);
- }
+
if (!containerHandlers.keySet().contains(CORES_HANDLER_PATH)) {
throw new IllegalStateException("No core admin path was loaded " + CORES_HANDLER_PATH);
@@ -900,10 +907,6 @@
metricManager.loadClusterReporters(cfg.getMetricsConfig().getMetricReporters(), this);
}
- List<Future<SolrCore>> coreLoadFutures = null;
-
-
- coreLoadFutures = new ArrayList<>(cds.size());
if (isZooKeeperAware()) {
cds = CoreSorter.sortCores(this, cds);
}
@@ -915,6 +918,15 @@
zkSys.getZkController().createEphemeralLiveNode();
}
+ } catch (Exception e) {
+ log.error("Exception in CoreContainer load", e);
+ for (final CoreDescriptor cd : cds) {
+ markCoreAsNotLoading(cd.getName());
+ }
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Exception in CoreContainer load", e);
+ }
+ List<Future<SolrCore>> coreLoadFutures = null;
+ coreLoadFutures = new ArrayList<>(cds.size());
for (final CoreDescriptor cd : cds) {
if (log.isDebugEnabled()) log.debug("Process core descriptor {} {} {}", cd.getName(), cd.isTransient(), cd.isLoadOnStartup());
@@ -922,26 +934,6 @@
solrCores.addCoreDescriptor(cd);
}
- // MRM TODO: look at ids for this
-// if (isZooKeeperAware()) {
-// String collection = cd.getCollectionName();
-//
-// if (!zkSys.zkController.getClusterState().hasCollection(collection)) {
-// solrCores.markCoreAsNotLoading(cd);
-// try {
-// coresLocator.delete(this, cd);
-// } catch (Exception e) {
-// log.error("Exception deleting core.properties file for non existing collection", e);
-// }
-//
-// try {
-// unload(cd, cd.getName(),true, true, true);
-// } catch (Exception e) {
-// log.error("Exception unloading core for non existing collection", e);
-// }
-// continue;
-// }
-// }
if (cd.isLoadOnStartup()) {
startedLoadingCores = true;
@@ -949,14 +941,7 @@
SolrCore core = null;
MDCLoggingContext.setCoreName(cd.getName());
try {
- try {
-
- core = createFromDescriptor(cd, false);
-
- } finally {
- solrCores.markCoreAsNotLoading(cd);
- }
-
+ core = createFromDescriptor(cd, false);
} catch (AlreadyClosedException e){
log.warn("Will not finish creating and registering core={} because we are shutting down", cd.getName(), e);
} catch (Exception e){
@@ -1250,17 +1235,10 @@
throw new AlreadyClosedException("Will not register SolrCore with ZooKeeper, already closed");
}
- // if (isShutDown) {
- // core.close();
- // throw new IllegalStateException("This CoreContainer has been closed");
- // }
- SolrCore old = solrCores.putCore(cd, core);
- /*
- * set both the name of the descriptor and the name of the
- * core, since the descriptors name is used for persisting.
- */
-
core.setName(cd.getName());
+ SolrCore old = solrCores.putCore(cd, core);
+
+ markCoreAsNotLoading(cd.getName());
coreInitFailures.remove(cd.getName());
@@ -1494,6 +1472,7 @@
throw solrException;
} catch (Throwable t) {
log.error("Unable to create SolrCore", t);
+ solrCores.markCoreAsNotLoading(dcore);
SolrException e = new SolrException(ErrorCode.SERVER_ERROR, "JVM Error creating core [" + dcore.getName() + "]: " + t.getMessage(), t);
coreInitFailures.put(dcore.getName(), new CoreLoadFailure(dcore, e));
solrCores.remove(dcore.getName());
@@ -1944,6 +1923,10 @@
if (cd == null) {
cd = solrCores.getCoreDescriptor(name);
}
+ if (name == null && cd != null) {
+ name = cd.getName();
+ }
+
SolrException exception = null;
try {
@@ -1982,9 +1965,10 @@
}
}
- SolrCore core;
-
- core = solrCores.remove(name);
+ SolrCore core = null;
+ if (name != null) {
+ core = solrCores.remove(name);
+ }
if (core != null) {
if (cd == null) {
cd = core.getCoreDescriptor();
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index 34f028d..0c5f6a9 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -106,7 +106,6 @@
import org.apache.solr.update.IndexFingerprint;
import org.apache.solr.update.SolrCoreState;
import org.apache.solr.update.SolrCoreState.IndexWriterCloser;
-import org.apache.solr.update.SolrIndexWriter;
import org.apache.solr.update.UpdateHandler;
import org.apache.solr.update.VersionInfo;
import org.apache.solr.update.processor.DistributedUpdateProcessorFactory;
@@ -504,12 +503,18 @@
return name;
}
- public void setName(String v) {
+ public Future setName(String v) {
+ if (v.equals(this.name)) {
+ return null;
+ }
this.name = v;
this.logid = (v == null) ? "" : ("[" + v + "] ");
if (coreMetricManager != null) {
- coreMetricManager.afterCoreSetName();
+ return coreContainer.coreContainerExecutor.submit(() -> {
+ coreMetricManager.afterCoreSetName();
+ });
}
+ return null;
}
public String getLogId() {
@@ -736,7 +741,7 @@
core = new SolrCore(coreContainer, getName(), coreConfig, cd, getDataDir(), updateHandler, solrDelPolicy, currentCore, true);
core.start();
// we open a new IndexWriter to pick up the latest config
- core.getUpdateHandler().getSolrCoreState().newIndexWriter(core, false);
+ core.getUpdateHandler().getSolrCoreState().newIndexWriter(core, false, false);
// core.getSearcher(true, false, null, true);
success = true;
return core;
@@ -804,7 +809,7 @@
* @deprecated Use of this method can only lead to race conditions. Try
* to actually obtain a lock instead.
*/
- @Deprecated private static boolean isWriterLocked (Directory directory) throws IOException {
+ @Deprecated private static boolean isWriterLocked(Directory directory) throws IOException {
try {
directory.obtainLock(IndexWriter.WRITE_LOCK_NAME).close();
return false;
@@ -822,13 +827,12 @@
// Create the index if it doesn't exist.
if (!indexExists) {
log.debug("{}Solr index directory '{}' doesn't exist. Creating new index...", logid, indexDir);
-
- try (SolrIndexWriter writer = SolrIndexWriter
- .buildIndexWriter(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true, getLatestSchema(), solrConfig.indexConfig, solrDelPolicy,
- codec, true)) {
- } catch (Exception e) {
- ParWork.propagateInterrupt(e);
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ RefCounted<IndexWriter> writer = getSolrCoreState().getIndexWriter(this, true);
+ IndexWriter iw = writer.get();
+ try {
+ iw.commit(); // readers need to see the segments file
+ } finally {
+ writer.decref();
}
}
@@ -978,9 +982,12 @@
CoreDescriptor cd = Objects.requireNonNull(coreDescriptor, "coreDescriptor cannot be null");
- setName(name);
+ Future future = setName(name);
+
+ // Initialize the metrics manager
this.solrConfig = configSet.getSolrConfig();
+
IndexSchema schema = configSet.getIndexSchema();
setLatestSchema(schema);
this.resourceLoader = configSet.getSolrConfig().getResourceLoader();
@@ -991,8 +998,12 @@
restManager = initRestManager(cd);
initRestManager.done();
- // Initialize the metrics manager
+
this.coreMetricManager = initCoreMetricManager(solrConfig);
+
+ if (future != null) {
+ future.get();
+ }
solrMetricsContext = coreMetricManager.getSolrMetricsContext();
StopWatch loadReporters = StopWatch.getStopWatch(this + "-loadReporters");
@@ -1669,12 +1680,17 @@
*
* @see #isClosed()
*/
- @Override public void close () {
+ @Override
+ public synchronized void close () {
int cref = refCount.get();
+ if (cref < 0) {
+ log.warn("Already closed " + cref);
+ return;
+ }
int count = refCount.decrementAndGet();
- if (count < -1) {
+ if (count < 0) {
refCount.set(-1);
log.warn("Already closed " + count);
return;
@@ -1739,7 +1755,7 @@
int timeouts = 30;
- // MRM TODO: put this timeout in play again
+ // MRM TODO: put this timeout in play again?
TimeOut timeout = new TimeOut(timeouts, TimeUnit.SECONDS, TimeSource.NANO_TIME);
int cnt = 0;
while (!canBeClosed() || refCount.get() != -1) {
@@ -1775,19 +1791,6 @@
return;
}
try {
- if (closing) {
- this.closing = true;
- while (!isClosed) {
- synchronized (closeAndWait) {
- try {
- closeAndWait.wait(500);
- } catch (InterruptedException e) {
-
- }
- }
- }
- return;
- }
if (log.isDebugEnabled()) log.debug("CLOSING SolrCore {}", logid);
assert ObjectReleaseTracker.release(this);
@@ -2143,7 +2146,7 @@
final ExecutorUtil.MDCAwareThreadPoolExecutor searcherExecutor = (ExecutorUtil.MDCAwareThreadPoolExecutor) ExecutorUtil
.newMDCAwareSingleThreadExecutor(new SolrNamedThreadFactory("searcherExecutor", true));
- private AtomicInteger onDeckSearchers = new AtomicInteger(); // number of searchers preparing
+ private final AtomicInteger onDeckSearchers = new AtomicInteger(); // number of searchers preparing
// Lock ordering: one can acquire the openSearcherLock and then the searcherLock, but not vice-versa.
private final ReentrantLock searcherLock = new ReentrantLock(true); // the sync object for the searcher
private final Condition searchLockCondition = searcherLock.newCondition();
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCores.java b/solr/core/src/java/org/apache/solr/core/SolrCores.java
index ae29756..db7ecb4 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCores.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCores.java
@@ -96,8 +96,6 @@
if (log.isDebugEnabled()) log.debug("Closing SolrCores");
this.closed = true;
- currentlyLoadingCores.clear();
-
Collection<SolrCore> coreList = new ArrayList<>();
TransientSolrCoreCache transientSolrCoreCache = getTransientCacheHandler();
@@ -135,6 +133,7 @@
}
});
+ currentlyLoadingCores.clear();
}
// Returns the old core if there was a core of the same name.
@@ -284,12 +283,14 @@
}
protected SolrCore remove(String name) {
+ currentlyLoadingCores.remove(name);
+
if (name == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot unload non-existent core [null]");
}
if (log.isDebugEnabled()) log.debug("remove core from solrcores {}", name);
- currentlyLoadingCores.remove(name);
+
SolrCore ret = cores.remove(name);
residentDesciptors.remove(name);
// It could have been a newly-created core. It could have been a transient core. The newly-created cores
@@ -387,11 +388,6 @@
return cds;
}
- // cores marked as loading will block on getCore
- public void markCoreAsLoading(CoreDescriptor cd) {
- markCoreAsLoading(cd.getName());
- }
-
public void markCoreAsLoading(String name) {
if (getAllCoreNames().contains(name)) {
log.warn("Creating a core with existing name is not allowed {}", name);
@@ -441,7 +437,7 @@
while (isCoreLoading(core)) {
synchronized (loadingSignal) {
try {
- loadingSignal.wait(1000);
+ loadingSignal.wait(500);
} catch (InterruptedException e) {
ParWork.propagateInterrupt(e);
return;
@@ -454,10 +450,7 @@
}
public boolean isCoreLoading(String name) {
- if (currentlyLoadingCores.contains(name)) {
- return true;
- }
- return false;
+ return (currentlyLoadingCores.contains(name));
}
public TransientSolrCoreCache getTransientCacheHandler() {
@@ -472,6 +465,5 @@
public void closing() {
this.closed = true;
- currentlyLoadingCores.clear();
}
}
diff --git a/solr/core/src/java/org/apache/solr/core/ZkContainer.java b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
index 60be50b..01ad136 100644
--- a/solr/core/src/java/org/apache/solr/core/ZkContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/ZkContainer.java
@@ -195,36 +195,6 @@
if (zkRun == null || zkRun.trim().length() == 0 || zkRun.lastIndexOf('/') < 0) return zkRun;
return zkRun.substring(0, zkRun.lastIndexOf('/'));
}
-
- public static volatile Predicate<CoreDescriptor> testing_beforeRegisterInZk;
-
- public Future registerInZk(final SolrCore core) {
- log.info("Register in ZooKeeper core={} liveNodes={}", core.getName(), zkController.getZkStateReader().getLiveNodes());
- CoreDescriptor cd = core.getCoreDescriptor(); // save this here - the core may not have it later
- Runnable r = () -> {
- MDCLoggingContext.setCoreName(core.getName());
- try {
- try {
- if (testing_beforeRegisterInZk != null) {
- boolean didTrigger = testing_beforeRegisterInZk.test(cd);
- if (log.isDebugEnabled()) {
- log.debug("{} pre-zk hook", (didTrigger ? "Ran" : "Skipped"));
- }
- }
-
- zkController.register(core.getName(), cd);
-
- } catch (Exception e) {
- log.error("Failed trying to register with zookeeper", e);
- }
- } finally {
- MDCLoggingContext.clear();
- }
- };
- // r.run();
- return ParWork.getRootSharedExecutor().submit(r); // ### expert usage
- //return null;
- }
public ZkController getZkController() {
return zkController;
diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
index 8b388cc..007291a 100644
--- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
+++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
@@ -152,7 +152,7 @@
final ReplicationHandler replicationHandler;
- private volatile Date replicationStartTimeStamp;
+ private volatile long replicationStartTimeStamp;
private RTimer replicationTimer;
private final SolrCore solrCore;
@@ -950,7 +950,9 @@
// must get the latest solrCore object because the one we have might be closed because of a reload
// todo stop keeping solrCore around
try (SolrCore core = solrCore.getCoreContainer().getCore(solrCore.getName())) {
- @SuppressWarnings({"rawtypes"})
+// testing
+// @SuppressWarnings({"rawtypes"}) SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+// core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
Future[] waitSearcher = new Future[1];
searcher = core.getSearcher(true, true, waitSearcher, true);
if (waitSearcher[0] != null) {
@@ -1598,21 +1600,21 @@
@SuppressForbidden(reason = "Need currentTimeMillis for debugging/stats")
private void markReplicationStart() {
replicationTimer = new RTimer();
- replicationStartTimeStamp = new Date();
+ replicationStartTimeStamp = System.nanoTime();
}
private void markReplicationStop() {
- replicationStartTimeStamp = null;
+ replicationStartTimeStamp = 0;
replicationTimer = null;
}
Date getReplicationStartTimeStamp() {
- return replicationStartTimeStamp;
+ return new Date(TimeUnit.MILLISECONDS.convert(replicationStartTimeStamp, TimeUnit.NANOSECONDS));
}
long getReplicationTimeElapsed() {
long timeElapsed = 0;
- if (replicationStartTimeStamp != null)
+ if (replicationStartTimeStamp > 0)
timeElapsed = TimeUnit.SECONDS.convert((long) replicationTimer.getTime(), TimeUnit.MILLISECONDS);
return timeElapsed;
}
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index 7483a6a..b103f09 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -38,6 +38,7 @@
import org.apache.solr.core.PluginBag;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrInfoBean;
+import org.apache.solr.handler.admin.PrepRecoveryOp;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.request.SolrQueryRequest;
@@ -226,7 +227,9 @@
ParWork.propagateInterrupt(e);
throw new AlreadyClosedException(e);
} catch (Exception e) {
- log.error("Exception handling request", e);
+ if (log.isDebugEnabled() && !(e instanceof PrepRecoveryOp.NotValidLeader)) {
+ log.error("Exception handling request", e);
+ }
if (req.getCore() != null) {
boolean isTragic = req.getCore().getCoreContainer().checkTragicException(req.getCore());
if (isTragic) {
@@ -255,6 +258,11 @@
}
}
+ if (e instanceof PrepRecoveryOp.NotValidLeader) {
+ isServerError = false;
+ incrementErrors = false;
+ }
+
rsp.setException(e);
if (incrementErrors) {
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ColStatus.java b/solr/core/src/java/org/apache/solr/handler/admin/ColStatus.java
index 260e8e0..1ce860c 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/ColStatus.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/ColStatus.java
@@ -139,6 +139,9 @@
case RECOVERING:
recoveringReplicas++;
break;
+ case BUFFERING:
+ recoveringReplicas++;
+ break;
case RECOVERY_FAILED:
recoveryFailedReplicas++;
break;
@@ -156,7 +159,7 @@
sliceMap.add("routingRules", rules);
}
sliceMap.add("replicas", replicaMap);
- Replica leader = zkStateReader.getLeaderRetry(collection, s.getName(), 10000);
+ Replica leader = s.getLeader();
if (leader == null) { // pick the first one
leader = s.getReplicas().size() > 0 ? s.getReplicas().iterator().next() : null;
}
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
index b534a00..adf38e9 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
@@ -387,6 +387,9 @@
public void execute(CallInfo it) throws Exception {
try {
fun.execute(it);
+ } catch (PrepRecoveryOp.NotValidLeader e) {
+ // No need to re-wrap; throw as-is.
+ throw e;
} catch (SolrException | InterruptedException e) {
log.error("Error handling CoreAdmin action", e);
if (e instanceof InterruptedException) {
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
index b85c530..77a1d81 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/MetricsHistoryHandler.java
@@ -644,7 +644,6 @@
try (ParWork closer = new ParWork(this)) {
closer.collect(knownDbs.values());
- closer.collect(solrClient);
}
IOUtils.closeQuietly(factory);
assert ObjectReleaseTracker.release(this);
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java b/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
index dee1675..827a002 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/PrepRecoveryOp.java
@@ -19,6 +19,7 @@
import org.apache.solr.cloud.LeaderElector;
import org.apache.solr.common.ParWork;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CoreAdminParams;
@@ -35,7 +36,7 @@
import java.util.concurrent.atomic.AtomicReference;
-class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
+public class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
@@ -64,12 +65,12 @@
LeaderElector leaderElector = it.handler.coreContainer.getZkController().getLeaderElector(leaderName);
if (leaderElector == null || !leaderElector.isLeader()) {
- throw new IllegalStateException("Not the valid leader (replica=" + leaderName + ")" + (leaderElector == null ? "No leader elector" : "Elector state=" + leaderElector.getState()) +
+ throw new NotValidLeader("Not the valid leader (replica=" + leaderName + ")" + (leaderElector == null ? "No leader elector" : "Elector state=" + leaderElector.getState()) +
" coll=" + collection);
}
if (waitForState == null) {
- log.info("Done checking leader:", cname);
+ log.info("Done checking leader:", leaderName);
return;
}
@@ -120,4 +121,11 @@
log.error(error);
}
}
+
+ public static class NotValidLeader extends SolrException {
+
+ public NotValidLeader(String s) {
+ super(ErrorCode.BAD_REQUEST, s);
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index d5152ca..4831833 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -71,6 +71,7 @@
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.admin.PrepRecoveryOp;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.ResultContext;
@@ -145,7 +146,7 @@
if (req.getCore().getCoreContainer().isZooKeeperAware() && Boolean.parseBoolean(onlyIfLeader)) {
LeaderElector leaderElector = req.getCore().getCoreContainer().getZkController().getLeaderElector(req.getCore().getName());
if (leaderElector == null || !leaderElector.isLeader()) {
- throw new IllegalStateException("Not the valid leader (replica=" + req.getCore().getName() + ")" + (leaderElector == null ? "No leader elector" : "Elector state=" + leaderElector.getState()) +
+ throw new PrepRecoveryOp.NotValidLeader("Not the valid leader (replica=" + req.getCore().getName() + ")" + (leaderElector == null ? "No leader elector" : "Elector state=" + leaderElector.getState()) +
" coll=" + req.getCore().getCoreContainer().getZkController().getClusterState().getCollectionOrNull(req.getCore().getCoreDescriptor().getCollectionName()));
}
}
diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrCoreMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrCoreMetricManager.java
index f073102..60fb253 100644
--- a/solr/core/src/java/org/apache/solr/metrics/SolrCoreMetricManager.java
+++ b/solr/core/src/java/org/apache/solr/metrics/SolrCoreMetricManager.java
@@ -28,6 +28,7 @@
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoBean;
+import org.apache.solr.search.SolrFieldCacheBean;
/**
* Helper class for managing registration of {@link SolrMetricProducer}'s
diff --git a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
index 324d997..1f1e813 100644
--- a/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
+++ b/solr/core/src/java/org/apache/solr/metrics/SolrMetricManager.java
@@ -31,9 +31,6 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.RejectedExecutionException;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
@@ -107,9 +104,7 @@
private static final ConcurrentMap<String, MetricRegistry> REGISTRIES = new ConcurrentHashMap<>(12, 0.75f, 1);
- private final Map<String, Map<String, SolrMetricReporter>> reporters = new HashMap<>(32);
-
- private final Lock reportersLock = new ReentrantLock();
+ private final Map<String, Map<String, SolrMetricReporter>> reporters = new ConcurrentHashMap<>(32);
public static final int DEFAULT_CLOUD_REPORTER_PERIOD = 60;
@@ -1044,32 +1039,23 @@
}
private void registerReporter(String registry, String name, String tag, SolrMetricReporter reporter) throws Exception {
- try {
- if (!reportersLock.tryLock(10, TimeUnit.SECONDS)) {
- throw new Exception("Could not obtain lock to modify reporters registry: " + registry);
+ Map<String,SolrMetricReporter> perRegistry = reporters.get(registry);
+ if (perRegistry == null) {
+ perRegistry = new ConcurrentHashMap<>();
+ Map<String,SolrMetricReporter> existingRegistry = reporters.putIfAbsent(registry, perRegistry);
+ if (existingRegistry != null) {
+ perRegistry = existingRegistry;
}
- } catch (InterruptedException e) {
- ParWork.propagateInterrupt(e);
- throw new Exception("Interrupted while trying to obtain lock to modify reporters registry: " + registry);
}
- try {
- Map<String, SolrMetricReporter> perRegistry = reporters.get(registry);
- if (perRegistry == null) {
- perRegistry = new HashMap<>();
- reporters.put(registry, perRegistry);
- }
- if (tag != null && !tag.isEmpty()) {
- name = name + "@" + tag;
- }
- SolrMetricReporter oldReporter = perRegistry.get(name);
- if (oldReporter != null) { // close it
- log.info("Replacing existing reporter '{}' in registry'{}': {}", name, registry, oldReporter);
- oldReporter.close();
- }
- perRegistry.put(name, reporter);
+ if (tag != null && !tag.isEmpty()) {
+ name = name + "@" + tag;
+ }
- } finally {
- reportersLock.unlock();
+ SolrMetricReporter oldReporter = perRegistry.put(name, reporter);
+
+ if (oldReporter != null) { // close it
+ log.info("Replacing existing reporter '{}' in registry'{}': {}", name, registry, oldReporter);
+ oldReporter.close();
}
}
@@ -1085,37 +1071,24 @@
public boolean closeReporter(String registry, String name, String tag) {
// make sure we use a name with prefix
registry = enforcePrefix(registry);
- try {
- if (!reportersLock.tryLock(10, TimeUnit.SECONDS)) {
- log.warn("Could not obtain lock to modify reporters registry: {}", registry);
- return false;
- }
- } catch (InterruptedException e) {
- ParWork.propagateInterrupt(e);
- log.warn("Interrupted while trying to obtain lock to modify reporters registry: {}", registry);
+
+ Map<String,SolrMetricReporter> perRegistry = reporters.get(registry);
+ if (perRegistry == null) {
+ return false;
+ }
+ if (tag != null && !tag.isEmpty()) {
+ name = name + "@" + tag;
+ }
+ SolrMetricReporter reporter = perRegistry.remove(name);
+ if (reporter == null) {
return false;
}
try {
- Map<String, SolrMetricReporter> perRegistry = reporters.get(registry);
- if (perRegistry == null) {
- return false;
- }
- if (tag != null && !tag.isEmpty()) {
- name = name + "@" + tag;
- }
- SolrMetricReporter reporter = perRegistry.remove(name);
- if (reporter == null) {
- return false;
- }
- try {
- reporter.close();
- } catch (Exception e) {
- log.warn("Error closing metric reporter, registry={}, name={}", registry, name, e);
- }
- return true;
- } finally {
- reportersLock.unlock();
+ reporter.close();
+ } catch (Exception e) {
+ log.warn("Error closing metric reporter, registry={}, name={}", registry, name, e);
}
+ return true;
}
/**
@@ -1142,34 +1115,29 @@
List<SolrMetricReporter> closeReporters = new ArrayList<>();
// make sure we use a name with prefix
registry = enforcePrefix(registry);
- try {
- reportersLock.lock();
- if (log.isDebugEnabled()) log.debug("Closing metric reporters for registry=" + registry + ", tag=" + tag);
- // MRM TODO:
- Map<String,SolrMetricReporter> perRegistry = reporters.get(registry);
- if (perRegistry != null) {
- Set<String> names = new HashSet<>(perRegistry.keySet());
+ if (log.isDebugEnabled()) log.debug("Closing metric reporters for registry=" + registry + ", tag=" + tag);
+ // MRM TODO:
+ Map<String,SolrMetricReporter> perRegistry = reporters.get(registry);
+ if (perRegistry != null) {
- names.forEach(name -> {
- if (tag != null && !tag.isEmpty() && !name.endsWith("@" + tag)) {
- return;
- }
- SolrMetricReporter reporter = perRegistry.remove(name);
- closeReporters.add(reporter);
- removed.add(name);
- });
- if (removed.size() == names.size()) {
- reporters.remove(registry);
+ perRegistry.keySet().forEach(name -> {
+ if (tag != null && !tag.isEmpty() && !name.endsWith("@" + tag)) {
+ return;
}
- }
+ SolrMetricReporter reporter = perRegistry.remove(name);
- } finally {
- reportersLock.unlock();
+ closeReporters.add(reporter);
+ removed.add(name);
+ });
+ if (perRegistry.size() == 0) {
+ reporters.remove(registry);
+ }
}
+
if (closeReporters.size() > 0) {
- try (ParWork closer = new ParWork(this, true, false)) {
+ try (ParWork closer = new ParWork(this, true, true)) {
closer.collect("MetricReporters", closeReporters);
}
}
@@ -1185,26 +1153,13 @@
public Map<String, SolrMetricReporter> getReporters(String registry) {
// make sure we use a name with prefix
registry = enforcePrefix(registry);
- try {
- if (!reportersLock.tryLock(10, TimeUnit.SECONDS)) {
- log.warn("Could not obtain lock to modify reporters registry: {}", registry);
- return Collections.emptyMap();
- }
- } catch (InterruptedException e) {
- ParWork.propagateInterrupt(e);
- log.warn("Interrupted while trying to obtain lock to modify reporters registry: {}", registry);
+
+ Map<String,SolrMetricReporter> perRegistry = reporters.get(registry);
+ if (perRegistry == null) {
return Collections.emptyMap();
- }
- try {
- Map<String, SolrMetricReporter> perRegistry = reporters.get(registry);
- if (perRegistry == null) {
- return Collections.emptyMap();
- } else {
- // defensive copy - the original map may change after we release the lock
- return Collections.unmodifiableMap(new HashMap<>(perRegistry));
- }
- } finally {
- reportersLock.unlock();
+ } else {
+ // defensive copy - the original map may change after we release the lock
+ return Collections.unmodifiableMap(new HashMap<>(perRegistry));
}
}
diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
index c9e803a..eaa9381 100644
--- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
+++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java
@@ -49,6 +49,7 @@
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.ContentStreamHandlerBase;
+import org.apache.solr.handler.admin.PrepRecoveryOp;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
@@ -233,7 +234,6 @@
solrReq.getContext().put(CoreContainer.class.getName(), cores);
requestType = RequestType.ADMIN;
action = ADMIN;
- ensureStatesAreAtLeastAtClient();
return;
}
@@ -266,7 +266,7 @@
}
if (core == null && log.isDebugEnabled()) {
- log.debug("tried to get core by name {} got {}, existing cores {} found={}", origCorename, core, cores.getAllCoreNames(), core != null);
+ log.debug("tried to get core by name {} got {}, existing cores {} loading={} found={}", origCorename, core, cores.getAllCoreNames(), cores.getLoadedCoreNames(), core != null);
}
if (core != null) {
@@ -350,10 +350,8 @@
solrReq = parser.parse(core, path, req);
}
-
invalidStates = checkStateVersionsAreValid(getCollectionsList(), queryParams.get(CloudSolrClient.STATE_VERSION));
- ensureStatesAreAtLeastAtClient();
addCollectionParamIfNeeded(getCollectionsList());
action = PROCESS;
@@ -370,34 +368,6 @@
action = PASSTHROUGH;
}
- private void ensureStatesAreAtLeastAtClient() throws InterruptedException, TimeoutException {
-// if (cores.isZooKeeperAware()) {
-// if (log.isDebugEnabled()) log.debug("State version for request is {}", queryParams.get(CloudSolrClient.STATE_VERSION));
-// Map<String,Integer> invalidStates = getStateVersions(queryParams.get(CloudSolrClient.STATE_VERSION));
-// if (invalidStates != null) {
-// Set<Map.Entry<String,Integer>> entries = invalidStates.entrySet();
-// for (Map.Entry<String,Integer> entry : entries) {
-// String collection = entry.getKey();
-// Integer version = entry.getValue();
-// if (log.isDebugEnabled()) log.debug("ensure states are at at least client version {} for collection {}", version, collection);
-// DocCollection docCollection = cores.getZkController().getZkStateReader().getClusterState().getCollectionOrNull(collection);
-// if (docCollection != null && docCollection.getZNodeVersion() < version) {
-// cores.getZkController().getZkStateReader().waitForState(collection, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
-// if (collectionState == null) {
-// return false;
-// }
-// log.info("found server state version {}", collectionState.getZNodeVersion());
-// if (collectionState.getZNodeVersion() < version) {
-// return false;
-// }
-// return true;
-// });
-// }
-// }
-// }
-// }
- }
-
protected void autoCreateSystemColl(String corename) throws Exception {
if (core == null &&
SYSTEM_COLL.equals(corename) &&
@@ -656,7 +626,7 @@
default: return action;
}
} catch (Throwable ex) {
- if (shouldAudit(EventType.ERROR)) {
+ if (!(ex instanceof PrepRecoveryOp.NotValidLeader) && shouldAudit(EventType.ERROR)) {
cores.getAuditLoggerPlugin().doAudit(new AuditEvent(EventType.ERROR, ex, req));
}
sendError(ex);
@@ -794,14 +764,6 @@
listener.getInputStream().transferTo(response.getOutputStream());
-// try {
-// listener.await(60, TimeUnit.SECONDS); // MRM TODO: timeout
-// } catch (InterruptedException e) {
-// log.error("Interrupted waiting for proxy request");
-// } catch (TimeoutException e) {
-// throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Timeout proxying request");
-// }
-
if (failException.get() != null) {
sendError(failException.get());
}
@@ -1034,19 +996,6 @@
/** Returns null if the state ({@link CloudSolrClient#STATE_VERSION}) is good; otherwise returns state problems. */
private Map<String, Integer> checkStateVersionsAreValid(List<String> collectionsList, String stateVer) {
- // TODO: for collections that are local and watched, we should just wait for the right min state, not eager fetch everything
-// Set<String> colList = cores.getZkController().getZkStateReader().getClusterState().getCollectionsMap().keySet();
-// if ((stateVer == null || stateVer.isEmpty()) && cores.isZooKeeperAware()) {
-// StringBuilder sb = new StringBuilder();
-// for (String collection : colList) {
-// if (sb.length() > 0) {
-// sb.append("|");
-// }
-// sb.append(collection + ":0>0");
-// }
-// stateVer = sb.toString();
-// }
-
Map<String, Integer> result = null;
String[] pairs;
if (stateVer != null && !stateVer.isEmpty() && cores.isZooKeeperAware()) {
@@ -1110,55 +1059,54 @@
return null;
}
- try {
- zkStateReader.waitForActiveCollection(collectionName, 10000, TimeUnit.MILLISECONDS, true, collection.getSlices().size(), collection.getReplicas().size(), false);
- } catch (Exception e) {
- log.warn("Did not find leaders for collection:" + collection.getName());
- }
-
if (isPreferLeader) {
List<Replica> leaderReplicas = collection.getLeaderReplicas(cores.getZkController().getNodeName());
log.debug("preferLeader leaderReplicas={}", leaderReplicas);
- SolrCore core = randomlyGetSolrCore(cores.getZkController().getZkStateReader().getLiveNodes(), leaderReplicas, true);
+ SolrCore core = randomlyGetSolrCore(leaderReplicas, true);
if (core != null) return core;
}
List<Replica> replicas = collection.getReplicas(cores.getZkController().getNodeName());
if (log.isDebugEnabled()) log.debug("replicas for node {} {}", replicas, cores.getZkController().getNodeName());
- SolrCore returnCore = randomlyGetSolrCore(cores.getZkController().getZkStateReader().getLiveNodes(), replicas, true);
+ SolrCore returnCore = randomlyGetSolrCore(replicas, true);
if (log.isDebugEnabled()) log.debug("returning core by collection {}", returnCore == null ? null : returnCore.getName());
return returnCore;
}
- private SolrCore randomlyGetSolrCore(Set<String> liveNodes, List<Replica> replicas, boolean checkActive) {
+ private SolrCore randomlyGetSolrCore(List<Replica> replicas, boolean checkActive) {
if (replicas != null) {
RandomIterator<Replica> it = new RandomIterator<>(random, replicas);
while (it.hasNext()) {
Replica replica = it.next();
- if (liveNodes.contains(replica.getNodeName())) {
- SolrCore core = checkProps(replica);
- if (core != null && checkActive && replica.getState() != Replica.State.ACTIVE) {
- try {
- cores.getZkController().getZkStateReader().waitForState(core.getCoreDescriptor().getCollectionName(), 1, TimeUnit.SECONDS, (liveNodes1, coll) -> {
- if (coll == null) {
- return false;
- }
- Replica rep = coll.getReplica(core.getName());
- if (rep == null) {
- return false;
- }
- if (rep.getState() != Replica.State.ACTIVE) {
- return false;
- }
+
+ SolrCore core = checkProps(replica);
+ if (core != null && checkActive) {
+ try {
+ cores.getZkController().getZkStateReader().waitForState(core.getCoreDescriptor().getCollectionName(), 1, TimeUnit.SECONDS, (liveNodes1, coll) -> {
+ if (coll == null) {
+ return false;
+ }
+ Replica rep = coll.getReplica(replica.getName());
+ if (rep == null) {
+ return false;
+ }
+ if (rep.getState() == Replica.State.ACTIVE) {
return true;
- });
- } catch (InterruptedException e) {
- } catch (TimeoutException e) { }
+ }
+ return false;
+ });
+ } catch (InterruptedException e) {
+ log.debug("interrupted waiting to see active replica");
+ return null;
+ } catch (TimeoutException e) {
+ log.debug("timeout waiting to see active replica {} {}", replica.getName(), replica.getState());
+ return null;
}
- if (core != null) return core;
+ return core;
}
}
}
+
return null;
}
@@ -1181,13 +1129,8 @@
if (docCollection == null) {
return null;
}
- Collection<Slice> slices = docCollection.getActiveSlices();
- if (slices.isEmpty()) {
- return null;
- }
-
- String coreUrl = getCoreUrl(slices);
+ String coreUrl = getCoreUrl(docCollection.getSlices());
if (log.isDebugEnabled()) {
log.debug("get remote core url returning {} for {} {}", coreUrl, collectionName, origCorename);
@@ -1203,7 +1146,9 @@
Collections.shuffle(randomizedReplicas, random);
for (Replica replica : randomizedReplicas) {
- if (cores.getZkController().zkStateReader.getLiveNodes().contains(replica.getNodeName())
+ log.debug("check replica {} with node name {} against live nodes {} with state {}",
+ replica.getName(), replica.getNodeName(), cores.getZkController().getZkStateReader().getLiveNodes(), replica.getState());
+ if (!replica.getNodeName().equals(cores.getZkController().getNodeName()) && cores.getZkController().zkStateReader.getLiveNodes().contains(replica.getNodeName())
&& replica.getState() == Replica.State.ACTIVE) {
coreUrl = replica.getCoreUrl();
diff --git a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
index 85d1e48..db32fe1 100644
--- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
+++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
@@ -124,13 +124,22 @@
}
@Override
- public RefCounted<IndexWriter> getIndexWriter(SolrCore core)
+ public RefCounted<IndexWriter> getIndexWriter(SolrCore core) throws IOException {
+ return getIndexWriter(core, false);
+ }
+
+ @Override
+ public RefCounted<IndexWriter> getIndexWriter(SolrCore core, boolean createIndex)
throws IOException {
if (core != null && (!core.indexEnabled || core.readOnly)) {
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
"Indexing is temporarily disabled");
}
+ if (core != null && core.isClosing()) {
+ throw new AlreadyClosedException();
+ }
+
boolean succeeded = false;
lock(iwLock.readLock());
try {
@@ -142,7 +151,10 @@
if (refCntWriter == null) return null;
} else {
if (indexWriter == null) {
- indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
+ if (core != null && core.isClosing() || core.isClosed() || core.getCoreContainer().isShutDown()) {
+ throw new AlreadyClosedException();
+ }
+ indexWriter = createMainIndexWriter(core, createIndex,"DirectUpdateHandler2");
}
initRefCntWriter();
}
@@ -195,7 +207,7 @@
}
// closes and opens index writers without any locking
- private void changeWriter(SolrCore core, boolean rollback, boolean openNewWriter) throws IOException {
+ private void changeWriter(SolrCore core, boolean rollback, boolean createIndex, boolean openNewWriter) throws IOException {
String coreName = core.getName();
// We need to null this so it picks up the new writer next get call.
@@ -224,7 +236,7 @@
}
if (openNewWriter) {
- indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2");
+ indexWriter = createMainIndexWriter(core, createIndex, "DirectUpdateHandler2");
log.info("New IndexWriter is ready to be used.");
}
}
@@ -233,7 +245,17 @@
public void newIndexWriter(SolrCore core, boolean rollback) throws IOException {
lock(iwLock.writeLock());
try {
- changeWriter(core, rollback, true);
+ changeWriter(core, rollback, false, true);
+ } finally {
+ iwLock.writeLock().unlock();
+ }
+ }
+
+ @Override
+ public void newIndexWriter(SolrCore core, boolean rollback, boolean createIndex) throws IOException {
+ lock(iwLock.writeLock());
+ try {
+ changeWriter(core, rollback, createIndex, true);
} finally {
iwLock.writeLock().unlock();
}
@@ -242,14 +264,14 @@
@Override
public void closeIndexWriter(SolrCore core, boolean rollback) throws IOException {
lock(iwLock.writeLock());
- changeWriter(core, rollback, false);
+ changeWriter(core, rollback, false,false);
// Do not unlock the writeLock in this method. It will be unlocked by the openIndexWriter call (see base class javadoc)
}
@Override
public void openIndexWriter(SolrCore core) throws IOException {
try {
- changeWriter(core, false, true);
+ changeWriter(core, false, false, true);
} finally {
iwLock.writeLock().unlock(); //unlock even if we failed
}
@@ -259,16 +281,16 @@
public void rollbackIndexWriter(SolrCore core) throws IOException {
iwLock.writeLock().lock();
try {
- changeWriter(core, true, true);
+ changeWriter(core, true, false, true);
} finally {
iwLock.writeLock().unlock();
}
}
- protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name) throws IOException {
+ protected SolrIndexWriter createMainIndexWriter(SolrCore core, boolean createIndex, String name) throws IOException {
SolrIndexWriter iw;
try {
- iw = SolrIndexWriter.buildIndexWriter(core, name, core.getNewIndexDir(), core.getDirectoryFactory(), false, core.getLatestSchema(),
+ iw = SolrIndexWriter.buildIndexWriter(core, name, core.getNewIndexDir(), core.getDirectoryFactory(), createIndex, core.getLatestSchema(),
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), false);
} catch (Exception e) {
ParWork.propagateInterrupt(e);
@@ -484,9 +506,9 @@
// a blocking race, we should not need to
// though
// iwLock.writeLock().lock();
- if (recoverying) {
- cancelRecovery(false, true);
- }
+
+ cancelRecovery(false, true);
+
try {
closeIndexWriter(closer);
} finally {
diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
index 3094740..4923f47 100644
--- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
+++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
@@ -781,7 +781,7 @@
@Override
public void newIndexWriter(boolean rollback) throws IOException {
- solrCoreState.newIndexWriter(core, rollback);
+ solrCoreState.newIndexWriter(core, rollback, false);
}
/**
diff --git a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
index 1bfe076..b789540 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
@@ -24,6 +24,7 @@
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
@@ -123,7 +124,7 @@
err.t);
// this can happen in certain situations such as close
- if (isRetry && rspCode != -1) {
+ if (isRetry) {
// if it's a io exception exception, lets try again
if (err.t instanceof SolrServerException) {
if (((SolrServerException) err.t).getRootCause() instanceof IOException && !(((SolrServerException) err.t).getRootCause() instanceof ClosedChannelException)) {
@@ -135,13 +136,13 @@
doRetry = true;
}
- if (err.req.retries < maxRetries && doRetry && !isClosed.isClosed()) {
+ if (err.req != null && err.req.retries.get() < maxRetries && doRetry && (isClosed == null || !isClosed.isClosed())) {
try {
- Thread.sleep(100);
+ Thread.sleep(10);
} catch (InterruptedException e) {
}
- err.req.retries++;
+ err.req.retries.incrementAndGet();
SolrException.log(SolrCmdDistributor.log, "sending update to "
+ oldNodeUrl + " failed - retrying ... retries: "
@@ -168,9 +169,9 @@
RollupRequestReplicationTracker rollupTracker,
LeaderRequestReplicationTracker leaderTracker) throws IOException {
if (nodes == null) return;
-// if (!cmd.isDeleteById()) {
-// blockAndDoRetries();
-// }
+ if (!cmd.isDeleteById()) {
+ blockAndDoRetries();
+ }
for (Node node : nodes) {
if (node == null) continue;
UpdateRequest uReq = new UpdateRequest();
@@ -215,7 +216,7 @@
public void distribCommit(CommitUpdateCommand cmd, List<Node> nodes,
ModifiableSolrParams params) {
// we need to do any retries before commit...
- //blockAndDoRetries();
+ blockAndDoRetries();
if (log.isDebugEnabled()) {
log.debug("Distrib commit to: {} params: {}", nodes, params);
}
@@ -350,7 +351,7 @@
public static class Req {
public Node node;
public UpdateRequest uReq;
- public int retries;
+ public AtomicInteger retries;
public boolean synchronous;
public UpdateCommand cmd;
final private RollupRequestReplicationTracker rollupTracker;
diff --git a/solr/core/src/java/org/apache/solr/update/SolrCoreState.java b/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
index ff874c9..df78e89 100644
--- a/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrCoreState.java
@@ -106,9 +106,10 @@
* @param rollback close IndexWriter if false, else rollback
* @throws IOException If there is a low-level I/O error.
*/
+ public abstract void newIndexWriter(SolrCore core, boolean rollback, boolean createIndex) throws IOException;
+
public abstract void newIndexWriter(SolrCore core, boolean rollback) throws IOException;
-
/**
* Expert method that closes the IndexWriter - you must call {@link #openIndexWriter(SolrCore)}
* in a finally block after calling this method.
@@ -127,14 +128,22 @@
* @throws IOException If there is a low-level I/O error.
*/
public abstract void openIndexWriter(SolrCore core) throws IOException;
-
+
+ /**
+ * Get the current IndexWriter. If a new IndexWriter must be created, use the
+ * settings from the given {@link SolrCore}.
+ *
+ * @throws IOException If there is a low-level I/O error.
+ */
+ public abstract RefCounted<IndexWriter> getIndexWriter(SolrCore core) throws IOException;
+
/**
* Get the current IndexWriter. If a new IndexWriter must be created, use the
* settings from the given {@link SolrCore}.
*
* @throws IOException If there is a low-level I/O error.
*/
- public abstract RefCounted<IndexWriter> getIndexWriter(SolrCore core) throws IOException;
+ public abstract RefCounted<IndexWriter> getIndexWriter(SolrCore core, boolean createIndex) throws IOException;
/**
* Rollback the current IndexWriter. When creating the new IndexWriter use the
@@ -201,7 +210,7 @@
public abstract Lock getRecoveryLock();
public Throwable getTragicException() throws IOException {
- RefCounted<IndexWriter> ref = getIndexWriter(null);
+ RefCounted<IndexWriter> ref = getIndexWriter(null, false);
if (ref == null) return null;
try {
return ref.get().getTragicException();
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
index 7e7b01a..d95e90d 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateShardHandler.java
@@ -103,7 +103,6 @@
Set<String> queryParams = new HashSet<>(2);
queryParams.add(DistributedUpdateProcessor.DISTRIB_FROM);
queryParams.add(DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM);
- queryParams.add(BaseCloudSolrClient.STATE_VERSION);
updateOnlyClient.setQueryParams(queryParams);
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
index e59b421..c993671 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java
@@ -164,8 +164,30 @@
@Override
public void processCommit(CommitUpdateCommand cmd) throws IOException {
Replica leaderReplica;
+
+
+
+ DocCollection coll = clusterState.getCollection(collection);
+
+ Slice slice = coll.getSlice(desc.getCloudDescriptor().getShardId());
+
+ String shardId = slice.getName();
+
try {
- leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, desc.getCloudDescriptor().getShardId(), 1000);
+
+ // Not equivalent to getLeaderProps, which retries to find a leader.
+ leaderReplica = slice.getLeader();
+ if (leaderReplica == null) {
+ leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, shardId, 10000);
+ } else {
+ isLeader = leaderReplica.getName().equals(desc.getName());
+ if (isLeader) {
+ LeaderElector leaderElector = req.getCore().getCoreContainer().getZkController().getLeaderElector(req.getCore().getName());
+ if (leaderElector == null || !leaderElector.isLeader()) {
+ leaderReplica = zkController.getZkStateReader().getLeaderRetry(req.getCore().getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient(), collection, shardId, 10000, true);
+ }
+ }
+ }
} catch (Exception e) {
ParWork.propagateInterrupt(e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Exception finding leader for shard " + cloudDesc.getShardId(), e);
@@ -231,9 +253,11 @@
}
}
if (removeNode != null) {
+ log.debug("remove leader node since we will do a local commit now {}", leaderReplica);
useNodes.remove(removeNode);
sendCommitToReplicasAndLocalCommit(cmd, worker, leaderReplica.getName(), params);
+ if (log.isDebugEnabled()) log.debug("processCommit(CommitUpdateCommand) - end");
}
}
@@ -288,7 +312,8 @@
worker.collect("localCommit", () -> {
try {
doLocalCommit(cmd);
- } catch (IOException e) {
+ } catch (Exception e) {
+ log.error("Failed local leader commit", e);
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
});
@@ -605,8 +630,12 @@
throw new SolrException(ErrorCode.SERVER_ERROR, "error getting leader", e);
}
// DBQ forwarded to NRT and TLOG replicas
+ Set<Replica.State> matchFilters = new HashSet<>(3);
+ matchFilters.add(Replica.State.BUFFERING);
+ matchFilters.add(Replica.State.RECOVERING);
+ matchFilters.add(Replica.State.ACTIVE);
List<Replica> replicaProps = zkController.getZkStateReader()
- .getReplicaProps(collection, myShardId, leaderReplica.getName(), Replica.State.BUFFERING, Replica.State.ACTIVE, EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
+ .getReplicaProps(collection, myShardId, leaderReplica.getName(), matchFilters, EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
if (replicaProps != null) {
final List<SolrCmdDistributor.Node> myReplicas = new ArrayList<>(replicaProps.size());
for (Replica replicaProp : replicaProps) {
@@ -648,8 +677,12 @@
// TODO: what if we are no longer the leader?
forwardToLeader = false;
+ Set<Replica.State> matchFilters = new HashSet<>(3);
+ matchFilters.add(Replica.State.BUFFERING);
+ matchFilters.add(Replica.State.RECOVERING);
+ matchFilters.add(Replica.State.ACTIVE);
List<Replica> replicaProps = zkController.getZkStateReader()
- .getReplicaProps(collection, shardId, name, Replica.State.BUFFERING, Replica.State.ACTIVE, EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
+ .getReplicaProps(collection, shardId, name, matchFilters, EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
if (replicaProps != null) {
nodes = new ArrayList<>(replicaProps.size());
for (Replica props : replicaProps) {
@@ -775,13 +808,23 @@
Replica leaderReplica;
try {
- doDefensiveChecks(phase);
-
// Not equivalent to getLeaderProps, which retries to find a leader.
leaderReplica = slice.getLeader();
-// leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, shardId, 10000);
+ if (leaderReplica == null) {
+ leaderReplica = zkController.getZkStateReader().getLeaderRetry(req.getCore().getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient(), collection, shardId, 10000, true);
+ } else {
+ isLeader = leaderReplica.getName().equals(desc.getName());
+ if (isLeader) {
+ LeaderElector leaderElector = req.getCore().getCoreContainer().getZkController().getLeaderElector(req.getCore().getName());
+ if (leaderElector == null || !leaderElector.isLeader()) {
+ leaderReplica = zkController.getZkStateReader().getLeaderRetry(req.getCore().getCoreContainer().getUpdateShardHandler().getTheSharedHttpClient(), collection, shardId, 10000, true);
+ }
+ }
+ }
isLeader = leaderReplica != null && leaderReplica.getName().equals(desc.getName());
+ doDefensiveChecks(phase);
+
if (!isLeader) {
isSubShardLeader = amISubShardLeader(coll, slice, id, doc);
if (isSubShardLeader) {
@@ -827,7 +870,7 @@
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
- for (Replica replica: replicas) {
+ for (Replica replica : replicas) {
String coreNodeName = replica.getName();
if (skipList != null && skipListSet.contains(replica.getCoreUrl())) {
if (log.isInfoEnabled()) {
@@ -838,14 +881,13 @@
log.info("skip url:{} cause its term is less than leader", replica.getCoreUrl());
skippedCoreNodeNames.add(replica.getName());
- } else if (!zkController.getZkStateReader().getLiveNodes().contains(replica.getNodeName()) || (replica.getState() != Replica.State.ACTIVE &&
- replica.getState() != Replica.State.BUFFERING)) {
+ } else if (!zkController.getZkStateReader().getLiveNodes().contains(replica.getNodeName()) || (replica.getState() == Replica.State.DOWN)) {
skippedCoreNodeNames.add(replica.getName());
} else {
nodes.add(new SolrCmdDistributor.StdNode(zkController.getZkStateReader(), replica, collection, shardId, maxRetriesToFollowers));
}
}
- if (log.isDebugEnabled()) log.debug("We are the leader {}, forward update to replicas.. {} {}", req.getCore().getName(), nodes);
+ if (log.isDebugEnabled()) log.debug("We are the leader {}, forward update to replicas.. {}", req.getCore().getName(), nodes);
return nodes;
} else {
@@ -1154,7 +1196,7 @@
DocCollection docCollection = clusterState.getCollection(collection);
Slice mySlice = docCollection.getSlice(cloudDesc.getShardId());
- if (DistribPhase.TOLEADER == phase) {
+ if (isLeader || DistribPhase.TOLEADER == phase) {
LeaderElector leaderElector = req.getCore().getCoreContainer().getZkController().getLeaderElector(req.getCore().getName());
if (leaderElector == null || !leaderElector.isLeader()) {
throw new IllegalStateException(
diff --git a/solr/core/src/java/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactory.java
index 6ee2d19..05d8a29 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactory.java
@@ -122,7 +122,7 @@
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
// Ensure the parameters are forwarded to the leader
- DistributedUpdateProcessorFactory.addParamToDistributedRequestWhitelist(req, PARAM_SKIP_INSERT_IF_EXISTS, PARAM_SKIP_UPDATE_IF_MISSING, BaseCloudSolrClient.STATE_VERSION);
+ DistributedUpdateProcessorFactory.addParamToDistributedRequestWhitelist(req, PARAM_SKIP_INSERT_IF_EXISTS, PARAM_SKIP_UPDATE_IF_MISSING);
// Allow the particular request to override the plugin's configured behaviour
boolean skipInsertForRequest = req.getOriginalParams().getBool(PARAM_SKIP_INSERT_IF_EXISTS, this.skipInsertIfExists);
diff --git a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
index 07a434c..6c3123c 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/TolerantUpdateProcessorFactory.java
@@ -131,7 +131,7 @@
return next;
}
- DistributedUpdateProcessorFactory.addParamToDistributedRequestWhitelist(req, MAX_ERRORS_PARAM, BaseCloudSolrClient.STATE_VERSION);
+ DistributedUpdateProcessorFactory.addParamToDistributedRequestWhitelist(req, MAX_ERRORS_PARAM);
int maxErrors = req.getParams().getInt(MAX_ERRORS_PARAM, defaultMaxErrors);
if (maxErrors < -1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "'"+MAX_ERRORS_PARAM + "' must either be non-negative, or -1 to indicate 'unlimited': " + maxErrors);
diff --git a/solr/core/src/java/org/apache/solr/util/TestInjection.java b/solr/core/src/java/org/apache/solr/util/TestInjection.java
index e590710..2a40466 100644
--- a/solr/core/src/java/org/apache/solr/util/TestInjection.java
+++ b/solr/core/src/java/org/apache/solr/util/TestInjection.java
@@ -120,7 +120,7 @@
public volatile static String randomDelayInCoreCreation = null;
- public volatile static int randomDelayMaxInCoreCreationInSec = 10;
+ public volatile static int randomDelayMaxInCoreCreationInSec = 5;
public volatile static String splitFailureBeforeReplicaCreation = null;
diff --git a/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java b/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
index 26bcc01..83d5c19 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
@@ -21,6 +21,7 @@
import java.lang.invoke.MethodHandles;
import java.util.HashSet;
import java.util.Set;
+import java.util.concurrent.TimeoutException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestUtil;
@@ -58,7 +59,7 @@
}
@Test
- public void test() throws IOException, SolrServerException, KeeperException, InterruptedException {
+ public void test() throws IOException, SolrServerException, KeeperException, InterruptedException, TimeoutException {
Set<String> coreNames = new HashSet<>();
int numOperations = random().nextInt(15) + 15;
diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
index ac2be7f..b7f7e67 100644
--- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
+++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
@@ -52,8 +52,8 @@
@LuceneTestCase.Nightly // MRM TODO: - check out more, convert to bridge
@Ignore // MRM TODO: convert to bridge
public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
- private static final String SHARD2 = "shard2";
- private static final String SHARD1 = "shard1";
+ private static final String SHARD2 = "s2";
+ private static final String SHARD1 = "s1";
private static final String ONE_NODE_COLLECTION = "onenodecollection";
private final boolean onlyLeaderIndexes = random().nextBoolean();
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
index 0815c69..ded5587 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
@@ -37,7 +37,7 @@
import java.util.concurrent.TimeUnit;
@Slow
-@LuceneTestCase.Nightly // MRM TODO: finish compare against control, look at setErrorHook
+@LuceneTestCase.Nightly // MRM TODO: look at setErrorHook
public class ChaosMonkeySafeLeaderTest extends SolrCloudBridgeTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -137,7 +137,7 @@
indexThread.start();
}
- chaosMonkey.startTheMonkey(false, 500);
+ chaosMonkey.startTheMonkey(false, 5000);
try {
long runLength;
if (RUN_LENGTH != -1) {
@@ -171,20 +171,34 @@
assertTrue(String.valueOf(indexThread.getFailCount()), indexThread.getFailCount() < 10);
}
- cluster.getSolrClient().getZkStateReader().waitForState(COLLECTION, 15, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
- if (collectionState == null) return false;
- Collection<Slice> slices = collectionState.getSlices();
- for (Slice slice : slices) {
- for (Replica replica : slice.getReplicas()) {
- if (cluster.getSolrClient().getZkStateReader().isNodeLive(replica.getNodeName())) {
+ while (true) {
+ cluster.getSolrClient().getZkStateReader().waitForState(COLLECTION, 15, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
+ if (collectionState == null) return false;
+ Collection<Slice> slices = collectionState.getSlices();
+ for (Slice slice : slices) {
+ for (Replica replica : slice.getReplicas()) {
+ if (cluster.getSolrClient().getZkStateReader().isNodeLive(replica.getNodeName())) {
if (replica.getState() != Replica.State.ACTIVE) {
return false;
}
+ }
}
}
+ return true;
+ });
+
+ Collection<Slice> slices = cluster.getSolrClient().getZkStateReader().getCollectionOrNull(COLLECTION).getSlices();
+ try {
+ for (Slice slice : slices) {
+ cluster.getSolrClient().getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), COLLECTION, slice.getName(), 5000, true);
+ }
+ break;
+ } catch (Exception e) {
+ log.error("exception waiting for leaders", e);
+ Thread.sleep(150);
+ continue;
}
- return true;
- });
+ }
commit();
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionStateZnodeTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionStateZnodeTest.java
index 3d0b339..c637fc1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionStateZnodeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionStateZnodeTest.java
@@ -20,12 +20,16 @@
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.data.Stat;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
// MRM TODO: - speed this up - waits for zkwriter to see its own state after delete
public class CollectionStateZnodeTest extends SolrCloudTestCase {
@@ -54,9 +58,21 @@
Stat stat = new Stat();
zkClient().getData(ZkStateReader.getCollectionPath(collectionName), null, stat);
- DocCollection c = getCollectionState(collectionName);
+ // the state.json itself can be ahead of the local DocCollection version due to state updates filling it in
+ try {
+ cluster.getSolrClient().getZkStateReader().waitForState(collectionName, 3, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
+ if (collectionState == null) {
+ return false;
+ }
+ if (collectionState.getZNodeVersion() != stat.getVersion() && !collectionState.getStateUpdates().get("_cs_ver_").equals(Integer.toString(stat.getVersion()))) {
+ return false;
+ }
+ return true;
+ });
+ } catch (TimeoutException e) {
+ fail("failed finding state in DocCollection that appears up to date with " + stat.getVersion());
+ }
- assertEquals("DocCollection version should equal the znode version", stat.getVersion(), c.getZNodeVersion() );
// remove collection
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
index 45be93a..b59505b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
@@ -89,7 +89,6 @@
System.setProperty("solr.zkclienttimeout", "15000");
System.setProperty("zkClientTimeout", "15000");
- System.setProperty("solr.getleader.looptimeout", "10000");
String timeout = "640000";
System.setProperty("solr.http2solrclient.default.idletimeout", timeout);
System.setProperty("distribUpdateSoTimeout", timeout);
diff --git a/solr/core/src/test/org/apache/solr/cloud/ConfigSetsAPITest.java b/solr/core/src/test/org/apache/solr/cloud/ConfigSetsAPITest.java
index 7de4fdb..36b7a26 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ConfigSetsAPITest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ConfigSetsAPITest.java
@@ -68,8 +68,7 @@
}
@Test
- @LuceneTestCase.Nightly // TODO speedup
- @Ignore // MRM TODO:
+ // @LuceneTestCase.Nightly // TODO speedup
public void testSharedSchema() throws Exception {
CollectionAdminRequest.createCollection("col1", "cShare", 1, 1)
.processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT);
@@ -80,10 +79,10 @@
CoreContainer coreContainer = cluster.getJettySolrRunner(0).getCoreContainer();
- try (SolrCore coreCol1 = coreContainer.getCore("col1_s1_r1");
- SolrCore coreCol2 = coreContainer.getCore("col2_s1_r1");
- SolrCore coreCol3 = coreContainer.getCore("col3_s1_r1")) {
- assertSame(coreCol1.getLatestSchema(), coreCol2.getLatestSchema());
+ try (SolrCore coreCol1 = coreContainer.getCore("col1_s1_r_n1");
+ SolrCore coreCol2 = coreContainer.getCore("col2_s1_r_n1");
+ SolrCore coreCol3 = coreContainer.getCore("col3_s1_r_n1")) {
+ assertSame(coreContainer.getAllCoreNames().toString(), coreCol1.getLatestSchema(), coreCol2.getLatestSchema());
assertNotSame(coreCol1.getLatestSchema(), coreCol3.getLatestSchema());
}
@@ -92,8 +91,8 @@
SolrTestCaseJ4.map("collection.configName", "conf1") // from cShare
).processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT);
- try (SolrCore coreCol1 = coreContainer.getCore("col1_shard1_replica_n1");
- SolrCore coreCol2 = coreContainer.getCore("col2_shard1_replica_n1")) {
+ try (SolrCore coreCol1 = coreContainer.getCore("col1_s1_r_n1");
+ SolrCore coreCol2 = coreContainer.getCore("col2_s1_r_n1")) {
assertNotSame(coreCol1.getLatestSchema(), coreCol2.getLatestSchema());
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java
index 5b5d462..c9265e7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java
@@ -94,7 +94,7 @@
// CoreAdminRequest.Create createRequest = new CoreAdminRequest.Create();
// createRequest.setCoreName("testcore");
// createRequest.setCollection(collectionName);
-// createRequest.setShardId("shard2");
+// createRequest.setShardId("s2");
// queryClient.request(createRequest);
// });
// assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("No coreNodeName for"));
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
index 3bec335..b9c2365 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteNodeTest.java
@@ -24,7 +24,6 @@
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.util.StrUtils;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -34,7 +33,6 @@
import java.util.Collections;
import java.util.Set;
-@Ignore // MRM TODO: flakey
public class DeleteNodeTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -68,14 +66,14 @@
CollectionAdminRequest.createCollection(coll, "conf1", 5, 2, 0, 0),
CollectionAdminRequest.createCollection(coll, "conf1", 5, 2, 1, 0)
);
- create = create.setCreateNodeSet(StrUtils.join(l, ',')).setMaxShardsPerNode(20);
+ create = create.setCreateNodeSet(StrUtils.join(l, ',')).setMaxShardsPerNode(20).waitForFinalState(true);
cloudClient.request(create);
state = cloudClient.getZkStateReader().getClusterState();
String node2bdecommissioned = l.get(0);
// check what replicas are on the node, and whether the call should fail
//new CollectionAdminRequest.DeleteNode(node2bdecommissioned).processAsync("003", cloudClient);
- new CollectionAdminRequest.DeleteNode(node2bdecommissioned).process(cloudClient);
+ new CollectionAdminRequest.DeleteNode(node2bdecommissioned).waitForFinalState(true).process(cloudClient);
// CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("003");
CollectionAdminRequest.RequestStatusResponse rsp = null;
// if (shouldFail) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
index 4a506ed..92dfddd 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
@@ -49,12 +49,10 @@
import org.apache.solr.common.cloud.ZkStateReaderAccessor;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
-import org.apache.solr.core.ZkContainer;
import org.apache.solr.util.TimeOut;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -153,7 +151,7 @@
final String collectionName = "deletereplica_test";
CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2).waitForFinalState(true).process(cluster.getSolrClient());
- Replica leader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(collectionName, "s1", 5000, true);
+ Replica leader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), collectionName, "s1", 5000, true);
//Confirm that the instance and data directory exist
CoreStatus coreStatus = getCoreStatus(leader);
@@ -166,7 +164,7 @@
log.info("leader was {}", leader);
- Replica newLeader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(collectionName, "s1", 5000, true);
+ Replica newLeader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), collectionName, "s1", 5000, true);
org.apache.solr.common.util.TimeOut timeOut = new org.apache.solr.common.util.TimeOut(2000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
while (!timeOut.hasTimedOut()) {
@@ -268,12 +266,10 @@
@Test
@LuceneTestCase.Slow
// commented out on: 17-Feb-2019 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // annotated on: 24-Dec-2018
- @LuceneTestCase.Nightly // TODO look at performance of this - need lower connection timeouts for test?
- @Ignore // MRM TODO:
+ //@LuceneTestCase.Nightly // TODO look at performance of this - need lower connection timeouts for test?
public void raceConditionOnDeleteAndRegisterReplica() throws Exception {
final String collectionName = "raceDeleteReplicaCollection";
- CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
- .process(cluster.getSolrClient());
+ CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2).waitForFinalState(true).process(cluster.getSolrClient());
Slice shard1 = getCollectionState(collectionName).getSlice("s1");
Replica leader = shard1.getLeader();
@@ -288,70 +284,56 @@
Semaphore waitingForReplicaGetDeleted = new Semaphore(0);
// for safety, we only want this hook get triggered one time
AtomicInteger times = new AtomicInteger(0);
- ZkContainer.testing_beforeRegisterInZk = cd -> {
- if (cd.getCloudDescriptor() == null) return false;
- if (replica1.getName().equals(cd.getName())
- && collectionName.equals(cd.getCloudDescriptor().getCollectionName())) {
- if (times.incrementAndGet() > 1) {
- return false;
- }
- log.info("Running delete core {}",cd);
-
- try {
- ZkNodeProps m = new ZkNodeProps(
- Overseer.QUEUE_OPERATION, OverseerAction.DELETECORE.toLower(),
- ZkStateReader.CORE_NAME_PROP, replica1.getName(),
- ZkStateReader.NODE_NAME_PROP, replica1.getNodeName(),
- ZkStateReader.COLLECTION_PROP, collectionName);
- cluster.getOpenOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
-
- boolean replicaDeleted = false;
- TimeOut timeOut = new TimeOut(25, TimeUnit.SECONDS, TimeSource.NANO_TIME);
- while (!timeOut.hasTimedOut()) {
- try {
- ZkStateReader stateReader = replica1Jetty.getCoreContainer().getZkController().getZkStateReader();
- Slice shard = stateReader.getClusterState().getCollection(collectionName).getSlice("s1");
- if (shard.getReplicas().size() == 1) {
- replicaDeleted = true;
- waitingForReplicaGetDeleted.release();
- break;
- }
- Thread.sleep(250);
- } catch (NullPointerException | SolrException e) {
- log.error("", e);
- Thread.sleep(250);
- }
- }
- if (!replicaDeleted) {
- fail("Timeout for waiting replica get deleted");
- }
- } catch (Exception e) {
- log.error("", e);
- fail("Failed to delete replica");
- } finally {
- //avoiding deadlock
- waitingForReplicaGetDeleted.release();
- }
- return true;
- }
- return false;
- };
-
try {
+ ZkController.testing_beforeRegisterInZk = cd -> {
+ if (cd.getCloudDescriptor() == null) return false;
+ if (replica1.getName().equals(cd.getName()) && collectionName.equals(cd.getCloudDescriptor().getCollectionName())) {
+ if (times.incrementAndGet() > 1) {
+ return false;
+ }
+ log.info("Running delete core {}", cd);
+
+ try {
+
+ CollectionAdminRequest.DeleteReplica deleteReplica = CollectionAdminRequest.deleteReplica(collectionName, replica1.getSlice(), replica1.getName());
+ deleteReplica.setAsyncId("async1");
+ deleteReplica.process(cluster.getSolrClient(), collectionName);
+
+ cluster.getSolrClient().getZkStateReader().waitForState(collectionName, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
+ if (collectionState == null) {
+ return false;
+ }
+ if (collectionState.getReplica(replica1.getName()) != null) {
+ return false;
+ }
+ waitingForReplicaGetDeleted.release();
+ return true;
+ });
+
+ } catch (Exception e) {
+ log.error("", e);
+ fail("Failed to delete replica");
+ } finally {
+ //avoiding deadlock
+ waitingForReplicaGetDeleted.release();
+ }
+ return true;
+ }
+ return false;
+ };
+
replica1Jetty.stop();
- waitForState("Expected replica:"+replica1+" get down", collectionName, (liveNodes, collectionState)
- -> collectionState.getSlice("s1").getReplica(replica1.getName()).getState() == DOWN);
+ waitForState("Expected replica:" + replica1 + " get down", collectionName, (liveNodes, collectionState) -> collectionState.getSlice("s1").getReplica(replica1.getName()).getState() == DOWN);
replica1Jetty.start();
waitingForReplicaGetDeleted.acquire();
} finally {
- ZkContainer.testing_beforeRegisterInZk = null;
+ ZkController.testing_beforeRegisterInZk = null;
}
TimeOut timeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
timeOut.waitFor("Timeout adding replica to shard", () -> {
try {
- CollectionAdminRequest.addReplicaToShard(collectionName, "s1")
- .process(cluster.getSolrClient());
+ CollectionAdminRequest.addReplicaToShard(collectionName, "s1").process(cluster.getSolrClient());
return true;
} catch (Exception e) {
// expected, when the node is not fully started
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
index 7740fa2..fb4810d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteShardTest.java
@@ -16,14 +16,12 @@
*/
package org.apache.solr.cloud;
-import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestUtil;
-import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreStatus;
@@ -80,7 +78,6 @@
setSliceState(collection, "s1", Slice.State.INACTIVE);
-
cluster.getSolrClient().getZkStateReader().waitForState(collection, 5, TimeUnit.SECONDS, (liveNodes, coll) -> {
if (coll == null) {
return false;
@@ -129,7 +126,7 @@
@Test
// commented 4-Sep-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 09-Aug-2018
- public void testDirectoryCleanupAfterDeleteShard() throws InterruptedException, IOException, SolrServerException {
+ public void testDirectoryCleanupAfterDeleteShard() throws Exception {
final String collection = "deleteshard_test";
CollectionAdminRequest.createCollectionWithImplicitRouter(collection, "conf", "a,b,c", 1)
@@ -169,8 +166,15 @@
req = CollectionAdminRequest.deleteShard(collection, "b");
req.setWaitForFinalState(true);
req.process(cluster.getSolrClient());
-
+
assertEquals(1, getCollectionState(collection).getActiveSlices().size());
+
+ if (FileUtils.fileExists(coreStatus.getInstanceDirectory())) {
+ Thread.sleep(250);
+ }
+ if (FileUtils.fileExists(coreStatus.getInstanceDirectory())) {
+ Thread.sleep(250);
+ }
assertFalse("Instance directory still exists", FileUtils.fileExists(coreStatus.getInstanceDirectory()));
assertFalse("Data directory still exists", FileUtils.fileExists(coreStatus.getDataDirectory()));
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteStatusTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteStatusTest.java
index 97a2f72..27087d0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteStatusTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteStatusTest.java
@@ -28,7 +28,6 @@
import org.apache.solr.client.solrj.response.RequestStatusState;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
public class DeleteStatusTest extends SolrCloudTestCase {
@@ -111,7 +110,6 @@
}
@Test
- @Ignore // MRM TODO: - once I changed how requests from queue were deleted, this popped up as a race issue
public void testDeleteStatusFlush() throws Exception {
final CloudHttp2SolrClient client = cluster.getSolrClient();
diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java
index 7e66e7f..0319e07 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java
@@ -168,7 +168,7 @@
// NOTE: don't assume we can find exactly totalNumDocs right now, some may have already been deleted...
// it should not take long for us to get to the point where all 'should_expire_s:yup' docs are gone
- waitForNoResults(30, params("q","should_expire_s:yup","rows","0","_trace","init_batch_check"));
+ waitForNoResults(10, params("q","should_expire_s:yup","rows","0","_trace","init_batch_check"));
{
// ...*NOW* we can assert that exactly numDocsThatNeverExpire should exist...
@@ -325,7 +325,7 @@
long numFound = req.process(cluster.getSolrClient(), COLLECTION).getResults().getNumFound();
while (0L < numFound && ! timeout.hasTimedOut()) {
- Thread.sleep(Math.max(1, Math.min(5000, timeout.timeLeft(TimeUnit.MILLISECONDS))));
+ Thread.sleep(Math.max(1, Math.min(1500, timeout.timeLeft(TimeUnit.MILLISECONDS))));
numFound = req.process(cluster.getSolrClient(), COLLECTION).getResults().getNumFound();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/DistributedVersionInfoTest.java b/solr/core/src/test/org/apache/solr/cloud/DistributedVersionInfoTest.java
index 4e32847..4609454 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DistributedVersionInfoTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DistributedVersionInfoTest.java
@@ -80,7 +80,7 @@
final String shardId = "s1";
- CollectionAdminRequest.createCollection(COLLECTION, "conf", 1, 3).waitForFinalState(true)
+ CollectionAdminRequest.createCollection(COLLECTION, "conf", 1, 3)
.process(cluster.getSolrClient());
final ZkStateReader stateReader = cluster.getSolrClient().getZkStateReader();
diff --git a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionOnCommitTest.java b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionOnCommitTest.java
index aae4227..6aa5905 100644
--- a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionOnCommitTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionOnCommitTest.java
@@ -101,7 +101,7 @@
}
// let's put the leader in its own partition, no replicas can contact it now
- Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
+ Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "s1");
if (log.isInfoEnabled()) {
log.info("Creating partition to leader at {}", leader.getCoreUrl());
}
@@ -109,12 +109,12 @@
leaderProxy.close();
// let's find the leader of shard2 and ask him to commit
- Replica shard2Leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard2");
+ Replica shard2Leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "s2");
sendCommitWithRetry(shard2Leader);
Thread.sleep(sleepMsBeforeHealPartition);
- leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
+ leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "s1");
assertSame("Leader was not active", Replica.State.ACTIVE, leader.getState());
if (log.isInfoEnabled()) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionContextKeyTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionContextKeyTest.java
index 131193c..9e6b005 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionContextKeyTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionContextKeyTest.java
@@ -59,10 +59,10 @@
.setCreateNodeSet("")
.process(cluster.getSolrClient());
CollectionAdminRequest
- .addReplicaToShard("testCollection"+i, "shard1")
+ .addReplicaToShard("testCollection"+i, "s1")
.process(cluster.getSolrClient());
CollectionAdminRequest
- .addReplicaToShard("testCollection"+i, "shard2")
+ .addReplicaToShard("testCollection"+i, "s2")
.process(cluster.getSolrClient());
}
}
@@ -75,8 +75,8 @@
// The test assume that TEST_COLLECTION_1 and TEST_COLLECTION_2 will have identical layout
// ( same replica's name on every shard )
for (int i = 1; i <= 2; i++) {
- String coll1ShardiLeader = clusterState.getCollection(TEST_COLLECTION_1).getLeader("shard"+i).getName();
- String coll2ShardiLeader = clusterState.getCollection(TEST_COLLECTION_2).getLeader("shard"+i).getName();
+ String coll1ShardiLeader = clusterState.getCollection(TEST_COLLECTION_1).getLeader("s"+i).getName();
+ String coll2ShardiLeader = clusterState.getCollection(TEST_COLLECTION_2).getLeader("s"+i).getName();
String assertMss = String.format(Locale.ROOT, "Expect %s and %s each have a replica with same name on shard %s",
coll1ShardiLeader, coll2ShardiLeader, "shard"+i);
assertEquals(
@@ -92,8 +92,8 @@
try (SolrClient shardLeaderClient = new HttpSolrClient.Builder(replica.get("base_url").toString()).build()) {
assertEquals(1L, getElectionNodes(TEST_COLLECTION_1, shard, stateReader.getZkClient()).size());
- List<String> collection2Shard1Nodes = getElectionNodes(TEST_COLLECTION_2, "shard1", stateReader.getZkClient());
- List<String> collection2Shard2Nodes = getElectionNodes(TEST_COLLECTION_2, "shard2", stateReader.getZkClient());
+ List<String> collection2Shard1Nodes = getElectionNodes(TEST_COLLECTION_2, "s1", stateReader.getZkClient());
+ List<String> collection2Shard2Nodes = getElectionNodes(TEST_COLLECTION_2, "s2", stateReader.getZkClient());
CoreAdminRequest.unloadCore(replica.getName(), shardLeaderClient);
// Waiting for leader election being kicked off
long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
@@ -108,8 +108,8 @@
}
assertTrue(found);
// There are no leader election was kicked off on testCollection2
- assertThat(collection2Shard1Nodes, CoreMatchers.is(getElectionNodes(TEST_COLLECTION_2, "shard1", stateReader.getZkClient())));
- assertThat(collection2Shard2Nodes, CoreMatchers.is(getElectionNodes(TEST_COLLECTION_2, "shard2", stateReader.getZkClient())));
+ assertThat(collection2Shard1Nodes, CoreMatchers.is(getElectionNodes(TEST_COLLECTION_2, "s1", stateReader.getZkClient())));
+ assertThat(collection2Shard2Nodes, CoreMatchers.is(getElectionNodes(TEST_COLLECTION_2, "s2", stateReader.getZkClient())));
}
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
index 4c0197e..49c9537 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
@@ -28,6 +28,7 @@
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.common.cloud.SolrZooKeeper;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.junit.AfterClass;
@@ -105,8 +106,8 @@
JettySolrRunner jetty = getRunner(leader);
ZkController zkController = jetty.getCoreContainer().getZkController();
- zkController.getZkClient().getSolrZooKeeper().closeCnxn();
- cluster.getZkServer().expire(zkController.getZkClient().getSolrZooKeeper().getSessionId());
+ ((SolrZooKeeper)zkController.getZkClient().getConnectionManager().getKeeper()).closeCnxn();
+ cluster.getZkServer().expire(zkController.getZkClient().getConnectionManager().getKeeper().getSessionId());
for (int i = 0; i < 60; i++) { // wait till leader is changed
if (jetty != getRunner(getLeader(collection))) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
index 0b5d63c..f8343c2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
@@ -36,6 +36,7 @@
import org.apache.solr.common.cloud.OnReconnect;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.SolrZooKeeper;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
@@ -231,11 +232,11 @@
// "http://127.0.0.1/solr/", ZkStateReader.CORE_NAME_PROP, "");
// ZkController zkController = MockSolrSource.makeSimpleMock(null, null, zkClient);
// ElectionContext context = new ShardLeaderElectionContextBase(elector,
-// "shard2", "collection1", "dummynode1", props, zkController);
+// "s2", "collection1", "dummynode1", props, zkController);
// elector.setup(context);
// elector.joinElection(context, false);
// assertEquals("http://127.0.0.1/solr/",
-// getLeaderUrl("collection1", "shard2"));
+// getLeaderUrl("collection1", "s2"));
// }
// MRM TODO:
@@ -515,7 +516,7 @@
int j;
j = random().nextInt(threads.size());
try {
- threads.get(j).es.zkClient.getSolrZooKeeper().closeCnxn();
+ ((SolrZooKeeper)threads.get(j).es.zkClient.getConnectionManager().getKeeper()).closeCnxn();
if (random().nextBoolean()) {
long sessionId = zkClient.getSessionId();
server.expire(sessionId);
@@ -556,7 +557,7 @@
// cleanup any threads still running
for (ClientThread thread : threads) {
- thread.es.zkClient.getSolrZooKeeper().close();
+ // thread.es.zkClient.getSolrZooKeeper().close();
thread.close();
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/MigrateRouteKeyTest.java b/solr/core/src/test/org/apache/solr/cloud/MigrateRouteKeyTest.java
index b68dcf0..610e9e6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MigrateRouteKeyTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MigrateRouteKeyTest.java
@@ -165,7 +165,7 @@
waitForState("Expected to find routing rule for split key " + splitKey, "sourceCollection", (n, c) -> {
if (c == null)
return false;
- Slice shard = c.getSlice("shard2");
+ Slice shard = c.getSlice("s2");
if (shard == null)
return false;
if (shard.getRoutingRules() == null || shard.getRoutingRules().isEmpty())
@@ -175,7 +175,7 @@
return true;
});
- boolean ruleRemoved = waitForRuleToExpire("sourceCollection", "shard2", splitKey, finishTime);
+ boolean ruleRemoved = waitForRuleToExpire("sourceCollection", "s2", splitKey, finishTime);
assertTrue("Routing rule was not expired", ruleRemoved);
}
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/OutOfBoxZkACLAndCredentialsProvidersTest.java b/solr/core/src/test/org/apache/solr/cloud/OutOfBoxZkACLAndCredentialsProvidersTest.java
index e15fa58..5f1f9fe 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OutOfBoxZkACLAndCredentialsProvidersTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OutOfBoxZkACLAndCredentialsProvidersTest.java
@@ -133,7 +133,7 @@
protected void assertOpenACLUnsafeAllover(SolrZkClient zkClient, String path, List<String> verifiedList) throws Exception {
- List<ACL> acls = zkClient.getSolrZooKeeper().getACL(path, new Stat());
+ List<ACL> acls = zkClient.getConnectionManager().getKeeper().getACL(path, new Stat());
if (log.isInfoEnabled()) {
log.info("Verifying {}", path);
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverriddenZkACLAndCredentialsProvidersTest.java b/solr/core/src/test/org/apache/solr/cloud/OverriddenZkACLAndCredentialsProvidersTest.java
index fb61edf..8b274d5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverriddenZkACLAndCredentialsProvidersTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverriddenZkACLAndCredentialsProvidersTest.java
@@ -93,7 +93,7 @@
zkClient = new SolrZkClientFactoryUsingCompletelyNewProviders(null, null,
null, null).getSolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT);
- zkClient.getSolrZooKeeper().addAuthInfo("digest", ("connectAndAllACLUsername:connectAndAllACLPassword").getBytes(DATA_ENCODING));
+ zkClient.getConnectionManager().getKeeper().addAuthInfo("digest", ("connectAndAllACLUsername:connectAndAllACLPassword").getBytes(DATA_ENCODING));
zkClient.create("/unprotectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.makePath("/unprotectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.close();
diff --git a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
index 1cdac23..03ed7e5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/PeerSyncReplicationTest.java
@@ -50,7 +50,6 @@
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.util.TimeOut;
import org.junit.AfterClass;
-import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
diff --git a/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java b/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java
index cd593d8..889b0b5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/RecoveryAfterSoftCommitTest.java
@@ -25,33 +25,33 @@
import org.apache.solr.common.cloud.Replica;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
// See SOLR-6640
@SolrTestCaseJ4.SuppressSSL
@LuceneTestCase.Nightly
-@Ignore // MRM TODO: proxy not working right?
public class RecoveryAfterSoftCommitTest extends SolrCloudBridgeTestCase {
private static final int MAX_BUFFERED_DOCS = 2, ULOG_NUM_RECORDS_TO_KEEP = 2;
public RecoveryAfterSoftCommitTest() {
+
+ }
+
+ @BeforeClass
+ public static void beforeTests() {
sliceCount = 1;
numJettys = 2;
replicationFactor = 2;
enableProxy = true;
uploadSelectCollection1Config = true;
+ solrconfigString = "solrconfig.xml";
+ schemaString = "schema.xml";
System.setProperty("solr.tests.maxBufferedDocs", String.valueOf(MAX_BUFFERED_DOCS));
System.setProperty("solr.ulog.numRecordsToKeep", String.valueOf(ULOG_NUM_RECORDS_TO_KEEP));
// avoid creating too many files, see SOLR-7421
System.setProperty("useCompoundFile", "true");
}
- @BeforeClass
- public static void beforeTests() {
-
- }
-
@AfterClass
public static void afterTest() {
diff --git a/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java b/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java
index 684df7f..33f2316 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ReplicationFactorTest.java
@@ -177,11 +177,11 @@
Set<Integer> byIDs;
byIDs = getSomeIds(2);
sendNonDirectDeletesRequestReplicaWithRetry(leader,
- byIDs, calcByIdRf(byIDs, testCollectionName, "shard2"),
+ byIDs, calcByIdRf(byIDs, testCollectionName, "s2"),
getSomeIds(2), 1, testCollectionName);
byIDs = getSomeIds(2);
sendNonDirectDeletesRequestReplicaWithRetry(replicas.get(0), byIDs,
- calcByIdRf(byIDs, testCollectionName, "shard2"),
+ calcByIdRf(byIDs, testCollectionName, "s2"),
getSomeIds(2), 1, testCollectionName);
// heal the partition
getProxyForReplica(shard2Replicas.get(0)).reopen();
diff --git a/solr/core/src/test/org/apache/solr/cloud/SolrCloudBridgeTestCase.java b/solr/core/src/test/org/apache/solr/cloud/SolrCloudBridgeTestCase.java
index f1db9fb..59322d1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SolrCloudBridgeTestCase.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SolrCloudBridgeTestCase.java
@@ -16,6 +16,7 @@
*/
package org.apache.solr.cloud;
+import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
@@ -184,16 +185,46 @@
}
if (uploadSelectCollection1Config) {
+ String path = null;
try {
- zkClient.create("/configs/_default/solrconfig.snippet.randomindexconfig.xml", TEST_PATH.resolve("collection1").resolve("conf").resolve("solrconfig.snippet.randomindexconfig.xml").toFile(),
- CreateMode.PERSISTENT, true);
- zkClient.create("/configs/_default/enumsConfig.xml", TEST_PATH.resolve("collection1").resolve("conf").resolve("enumsConfig.xml").toFile(), CreateMode.PERSISTENT, true);
- zkClient.create("/configs/_default/currency.xml", TEST_PATH.resolve("collection1").resolve("conf").resolve("currency.xml").toFile(), CreateMode.PERSISTENT, true);
- zkClient.create("/configs/_default/old_synonyms.txt", TEST_PATH.resolve("collection1").resolve("conf").resolve("old_synonyms.txt").toFile(), CreateMode.PERSISTENT, true);
- zkClient.create("/configs/_default/open-exchange-rates.json", TEST_PATH.resolve("collection1").resolve("conf").resolve("open-exchange-rates.json").toFile(), CreateMode.PERSISTENT, true);
- zkClient.create("/configs/_default/mapping-ISOLatin1Accent.txt", TEST_PATH.resolve("collection1").resolve("conf").resolve("mapping-ISOLatin1Accent.txt").toFile(), CreateMode.PERSISTENT, true);
+ path = "/configs/_default/solrconfig.snippet.randomindexconfig.xml";
+ zkClient.create(path, TEST_PATH.resolve("collection1").resolve("conf").resolve(new File(path).getName()).toFile(), CreateMode.PERSISTENT, true);
} catch (KeeperException.NodeExistsException exists) {
- log.info("extra collection config files already exist in zk");
+ log.info("extra collection config file already exist in zk {}", path);
+ }
+
+ try {
+ path = "/configs/_default/enumsConfig.xml";
+ zkClient.create(path, TEST_PATH.resolve("collection1").resolve("conf").resolve(new File(path).getName()).toFile(), CreateMode.PERSISTENT, true);
+ } catch (KeeperException.NodeExistsException exists) {
+ log.info("extra collection config file already exist in zk {}", path);
+ }
+
+ try {
+ path = "/configs/_default/currency.xml";
+ zkClient.create(path, TEST_PATH.resolve("collection1").resolve("conf").resolve(new File(path).getName()).toFile(), CreateMode.PERSISTENT, true);
+ } catch (KeeperException.NodeExistsException exists) {
+ log.info("extra collection config file already exist in zk {}", path);
+ }
+
+ try {
+ path = "/configs/_default/old_synonyms.txt";
+ zkClient.create(path, TEST_PATH.resolve("collection1").resolve("conf").resolve(new File(path).getName()).toFile(), CreateMode.PERSISTENT, true);
+ } catch (KeeperException.NodeExistsException exists) {
+ log.info("extra collection config file already exist in zk {}", path);
+ }
+
+ try {
+ path = "/configs/_default/open-exchange-rates.json";
+ zkClient.create(path, TEST_PATH.resolve("collection1").resolve("conf").resolve(new File(path).getName()).toFile(), CreateMode.PERSISTENT, true);
+ } catch (KeeperException.NodeExistsException exists) {
+ log.info("extra collection config file already exist in zk {}", path);
+ }
+ try {
+ path = "/configs/_default/mapping-ISOLatin1Accent.txt";
+ zkClient.create(path, TEST_PATH.resolve("collection1").resolve("conf").resolve(new File(path).getName()).toFile(), CreateMode.PERSISTENT, true);
+ } catch (KeeperException.NodeExistsException exists) {
+ log.info("extra collection config file already exist in zk {}", path);
}
}
@@ -428,24 +459,8 @@
return Integer.parseInt(tmp);
}
- protected Replica getShardLeader(String testCollectionName, String shardId, int timeoutSecs) throws Exception {
- Replica leader = null;
- long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(timeoutSecs, TimeUnit.SECONDS);
- while (System.nanoTime() < timeout) {
- Replica tmp = null;
- try {
- tmp = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
- } catch (Exception exc) {}
- if (tmp != null && State.ACTIVE == tmp.getState()) {
- leader = tmp;
- break;
- }
- Thread.sleep(250);
- }
- assertNotNull("Could not find active leader for " + shardId + " of " +
- testCollectionName + " after "+timeoutSecs+" secs;", leader);
-
- return leader;
+ protected Replica getShardLeader(String testCollectionName, String shardId, int timeoutms) throws Exception {
+ return cloudClient.getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), testCollectionName, shardId, timeoutms, true);
}
protected JettySolrRunner getJettyOnPort(int port) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java b/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java
index a5fb602..abd1569 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java
@@ -84,7 +84,7 @@
}
@Test
- public void doTest() throws IOException, SolrServerException {
+ public void doTest() throws Exception {
CollectionAdminRequest
.createCollection(COLLECTION_NAME, "conf", 2, 1)
.setMaxShardsPerNode(100)
diff --git a/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java b/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
index 2a1d35a..dc48ab4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java
@@ -25,6 +25,7 @@
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
@@ -37,6 +38,7 @@
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
+import java.util.concurrent.TimeUnit;
/**
* Test sync phase that occurs when Leader goes down and a new Leader is
@@ -73,7 +75,7 @@
handle.clear();
handle.put("timestamp", SKIPVAL);
- // waitForThingsToLevelOut(30, TimeUnit.SECONDS);
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 5, TimeUnit.SECONDS, false, 1, numJettys, true, true);
List<JettySolrRunner> skipServers = new ArrayList<>();
int docId = 0;
@@ -126,6 +128,7 @@
// kill the leader - new leader could have all the docs or be missing one
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(getShardLeader(COLLECTION, "s1", 10000)));
+ log.info("Stopping leader jetty {}", leaderJetty.getBaseUrl());
skipServers = getRandomOtherJetty(leaderJetty, null); // but not the leader
@@ -144,7 +147,11 @@
int cnt = 0;
while (deadJetty == leaderJetty) {
// updateMappingsFromZk(this.jettys, this.clients);
- leaderJetty = getJettyOnPort(getReplicaPort(getShardLeader(COLLECTION, "s1", 5)));
+ try {
+ leaderJetty = getJettyOnPort(getReplicaPort(getShardLeader(COLLECTION, "s1", 1000)));
+ } catch (SolrException e) {
+ log.info("did not get leader", e);
+ }
if (deadJetty == leaderJetty) {
Thread.sleep(500);
}
@@ -157,7 +164,7 @@
deadJetty.start(); // he is not the leader anymore
log.info("numJettys=" + numJettys);
- cluster.waitForActiveCollection(COLLECTION, 1, numJettys);
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 10, TimeUnit.SECONDS, false, 1, numJettys, true, true);
skipServers = getRandomOtherJetty(leaderJetty, deadJetty);
skipServers.addAll(getRandomOtherJetty(leaderJetty, deadJetty));
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
index bad1713..71786e1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
@@ -35,6 +35,7 @@
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
+import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
@@ -86,7 +87,8 @@
@AfterClass
private static void afterClass() throws Exception {
if (null != CLOUD_CLIENT) {
- CLOUD_CLIENT.close();
+ // WE DONT OWN CLOUD_CLIENT!
+ // CLOUD_CLIENT.close();
CLOUD_CLIENT = null;
}
if (null != S_ONE_LEADER_CLIENT) {
@@ -212,8 +214,8 @@
}
}
- @Before
- private void clearCloudCollection() throws Exception {
+ @After
+ public void clearCloudCollection() throws Exception {
assertEquals(0, CLOUD_CLIENT.deleteByQuery("*:*").getStatus());
assertEquals(0, CLOUD_CLIENT.commit().getStatus());
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
index 9f04280..cf7f467 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
@@ -149,9 +149,9 @@
if (resp.getResults().getNumFound() >= 4) {
break;
}
+ Thread.sleep(100);
}
- resp = cloudClient.query(COLLECTION, params);
assertEquals(4, resp.getResults().getNumFound());
// Make sure all leader nodes recover from tlog
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery2.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery2.java
index 2e6678e..2063b47 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery2.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery2.java
@@ -25,6 +25,7 @@
import org.apache.solr.SolrTestUtil;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.BaseHttpSolrClient;
import org.apache.solr.client.solrj.impl.Http2SolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -46,12 +47,13 @@
useFactory(null);
System.setProperty("solr.ulog.numRecordsToKeep", "1000");
- configureCluster(2)
+ configureCluster(3)
.addConfig("config", SolrTestUtil.TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.configure();
+ // 2 replicas will not ensure we don't lose an update here, so 3
CollectionAdminRequest
- .createCollection(COLLECTION, "config", 1,2)
+ .createCollection(COLLECTION, "config", 1,3)
.setMaxShardsPerNode(100)
.waitForFinalState(true)
.process(cluster.getSolrClient());
@@ -66,26 +68,40 @@
public void test() throws Exception {
JettySolrRunner node1 = cluster.getJettySolrRunner(0);
JettySolrRunner node2 = cluster.getJettySolrRunner(1);
- try (Http2SolrClient client1 = SolrTestCaseJ4.getHttpSolrClient(node1.getBaseUrl().toString())) {
+
+ try (Http2SolrClient client1 = SolrTestCaseJ4.getHttpSolrClient(node1.getBaseUrl())) {
node2.stop();
- cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 1);
- cluster.waitForActiveCollection(COLLECTION, 1, 1, true);
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
+ Thread.sleep(250);
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 5, TimeUnit.SECONDS, false, 1, 2, true, true);
UpdateRequest req = new UpdateRequest();
for (int i = 0; i < 100; i++) {
req = req.add("id", i+"", "num", i+"");
}
- req.commit(client1, COLLECTION);
+
+ try {
+ req.commit(client1, COLLECTION);
+ } catch (BaseHttpSolrClient.RemoteSolrException e) {
+ Thread.sleep(250);
+ try {
+ req.commit(client1, COLLECTION);
+ } catch (BaseHttpSolrClient.RemoteSolrException e2) {
+ Thread.sleep(500);
+ req.commit(client1, COLLECTION);
+ }
+ }
node2.start();
- cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 3);
+ Thread.sleep(250);
+ log.info("wait for active collection before query");
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 10, TimeUnit.SECONDS, false, 1, 3, true, true);
- cluster.waitForActiveCollection(COLLECTION, 1, 2, true);
-
- try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl().toString())) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl())) {
long numFound = client.query(COLLECTION, new SolrQuery("q","*:*", "distrib", "false")).getResults().getNumFound();
assertEquals(100, numFound);
}
@@ -95,24 +111,50 @@
new UpdateRequest().add("id", "1", "num", "10")
.commit(client1, COLLECTION);
- Object v = client1.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "true")).getResults().get(0).get("num");
+ // can be stale (eventually consistent) but should catch up
+ for (int i = 0; i < 30; i++) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl())) {
+ Object v = client1.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "true")).getResults().get(0).get("num");
+ try {
+ assertEquals("10 i="+ i, "10", v.toString());
+ break;
+ } catch (AssertionError error) {
+ if (i == 29) {
+ throw error;
+ }
+ Thread.sleep(100);
+ }
+ }
+ }
+
+ Object v = client1.query(COLLECTION, new SolrQuery("q", "id:1", "distrib", "false")).getResults().get(0).get("num");
assertEquals("10", v.toString());
- v = client1.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
- assertEquals("10", v.toString());
-
-
- try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl().toString())) {
- v = client.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "true")).getResults().get(0).get("num");
- assertEquals("10", v.toString());
+ // can be stale (eventually consistent) but should catch up
+ for (int i = 0; i < 30; i ++) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl())) {
+ v = client.query(COLLECTION, new SolrQuery("q", "id:1", "distrib", "true")).getResults().get(0).get("num");
+ try {
+ assertEquals("node requested=" + node2.getBaseUrl() + " 10 i="+ i, "10", v.toString());
+ break;
+ } catch (AssertionError error) {
+ if (i == 29) {
+ throw error;
+ }
+ Thread.sleep(100);
+ }
+ }
}
//
node2.stop();
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
Thread.sleep(250);
+ log.info("wait for active collection before query");
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 10, TimeUnit.SECONDS, false, 1, 2, true, true);
new UpdateRequest().add("id", "1", "num", "20")
.commit(client1, COLLECTION);
@@ -122,9 +164,10 @@
node2.start();
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 3);
Thread.sleep(250);
-
- cluster.waitForActiveCollection(COLLECTION, 1, 2);
+ log.info("wait for active collection before query");
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 10, TimeUnit.SECONDS, false, 1, 3, true, true);
try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl().toString())) {
v = client.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
@@ -134,32 +177,56 @@
node2.stop();
- Thread.sleep(250);
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
+
+ log.info("wait for active collection before query");
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 10, TimeUnit.SECONDS, false, 1, 2, true, true);
new UpdateRequest().add("id", "1", "num", "30")
.commit(client1, COLLECTION);
- v = client1.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
- SolrTestCaseJ4.assertEquals("30", v.toString());
+
+
+ for (int i = 0; i < 30; i ++) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl())) {
+ v = client1.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
+ try {
+ SolrTestCaseJ4.assertEquals("30", v.toString());
+ break;
+ } catch (AssertionError error) {
+ if (i == 29) {
+ throw error;
+ }
+ Thread.sleep(100);
+ }
+ }
+ }
+
node2.start();
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 3);
+
Thread.sleep(250);
- cluster.waitForActiveCollection(COLLECTION, 1, 2);
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 5, TimeUnit.SECONDS, false, 1, 3, true, true);
- try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl().toString())) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl())) {
v = client.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
assertEquals("30", v.toString());
}
v = client1.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
assertEquals("30", v.toString());
}
- Replica oldLeader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(COLLECTION,"s1");
-
+ Replica oldLeader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), COLLECTION,"s1", 5000, true);
node1.stop();
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
+ Thread.sleep(250);
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 5, TimeUnit.SECONDS, false, 1, 2, true, true);
+
+ Replica newLeader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), COLLECTION,"s1", 5000, true);
if (oldLeader.getNodeName().equals(node1.getNodeName())) {
waitForState("", COLLECTION, (liveNodes, collectionState) -> {
@@ -170,15 +237,17 @@
node1.start();
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 3);
Thread.sleep(250);
+ cluster.getSolrClient().getZkStateReader().getZkClient().printLayout();
- cluster.waitForActiveCollection(COLLECTION, 1, 2);
+ cluster.waitForActiveCollection(cluster.getSolrClient().getHttpClient(), COLLECTION, 10, TimeUnit.SECONDS, false, 1, 3, true, true);
- try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node1.getBaseUrl().toString())) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node1.getBaseUrl())) {
Object v = client.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
assertEquals("30", v.toString());
}
- try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl().toString())) {
+ try (Http2SolrClient client = SolrTestCaseJ4.getHttpSolrClient(node2.getBaseUrl())) {
Object v = client.query(COLLECTION, new SolrQuery("q","id:1", "distrib", "false")).getResults().get(0).get("num");
assertEquals("30", v.toString());
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java b/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java
index 4fc2202..5731d75 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java
@@ -284,43 +284,28 @@
@Test
public void testUploadErrors() throws Exception {
- final SolrClient solrClient = getHttpSolrClient(solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString());
-
ByteBuffer emptyData = ByteBuffer.allocate(0);
// Checking error when no configuration name is specified in request
- Map map = postDataAndGetResponse(solrCluster.getSolrClient(),
- solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString()
- + "/admin/configs?action=UPLOAD", emptyData, null, null);
+ Map map = postDataAndGetResponse(solrCluster.getSolrClient(), solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/admin/configs?action=UPLOAD", emptyData, null, null);
assertNotNull(map);
- long statusCode = (long) getObjectByPath(map, false,
- Arrays.asList("responseHeader", "status"));
+ long statusCode = (long) getObjectByPath(map, false, Arrays.asList("responseHeader", "status"));
assertEquals(400l, statusCode);
SolrZkClient zkClient = zkClient();
// Create dummy config files in zookeeper
zkClient.mkdir("/configs/myconf");
- zkClient.create("/configs/myconf/firstDummyFile",
- "first dummy content".getBytes(StandardCharsets.UTF_8), CreateMode.PERSISTENT, true);
- zkClient.create("/configs/myconf/anotherDummyFile",
- "second dummy content".getBytes(StandardCharsets.UTF_8), CreateMode.PERSISTENT, true);
+ zkClient.create("/configs/myconf/firstDummyFile", "first dummy content".getBytes(StandardCharsets.UTF_8), CreateMode.PERSISTENT, true);
+ zkClient.create("/configs/myconf/anotherDummyFile", "second dummy content".getBytes(StandardCharsets.UTF_8), CreateMode.PERSISTENT, true);
// Checking error when configuration name specified already exists
- map = postDataAndGetResponse(solrCluster.getSolrClient(),
- solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString()
- + "/admin/configs?action=UPLOAD&name=myconf", emptyData, null, null);
+ map = postDataAndGetResponse(solrCluster.getSolrClient(), solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/admin/configs?action=UPLOAD&name=myconf", emptyData, null, null);
assertNotNull(map);
- statusCode = (long) getObjectByPath(map, false,
- Arrays.asList("responseHeader", "status"));
+ statusCode = (long) getObjectByPath(map, false, Arrays.asList("responseHeader", "status"));
assertEquals(400l, statusCode);
- assertTrue("Expected file doesnt exist in zk. It's possibly overwritten",
- zkClient.exists("/configs/myconf/firstDummyFile"));
- assertTrue("Expected file doesnt exist in zk. It's possibly overwritten",
- zkClient.exists("/configs/myconf/anotherDummyFile"));
-
- zkClient.close();
- solrClient.close();
+ assertTrue("Expected file doesnt exist in zk. It's possibly overwritten", zkClient.exists("/configs/myconf/firstDummyFile"));
+ assertTrue("Expected file doesnt exist in zk. It's possibly overwritten", zkClient.exists("/configs/myconf/anotherDummyFile"));
}
@Test
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java b/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java
index ef2e6c0..2087f54 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDistribDocBasedVersion.java
@@ -41,8 +41,8 @@
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
- String bucket1 = "shard1"; // shard1: top bits:10 80000000:ffffffff
- String bucket2 = "shard2"; // shard2: top bits:00 00000000:7fffffff
+ String bucket1 = "s1"; // shard1: top bits:10 80000000:ffffffff
+ String bucket2 = "s2"; // shard2: top bits:00 00000000:7fffffff
private static String vfield = "my_version_l";
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java b/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
index a699fd4..1b007e1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
@@ -25,6 +25,7 @@
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.ShardParams;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
@@ -32,6 +33,7 @@
import static org.hamcrest.CoreMatchers.is;
import java.lang.invoke.MethodHandles;
+import java.util.concurrent.TimeUnit;
/**
* Test which asserts that shards.tolerant=true works even if one shard is down
@@ -43,15 +45,22 @@
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@BeforeClass
- public static void setupCluster() throws Exception {
+ public static void beforeTestDownShardTolerantSearch() throws Exception {
configureCluster(2).addConfig("conf", SolrTestUtil.configset("cloud-minimal")).configure();
+ CollectionAdminRequest.createCollection("tolerant", "conf", 2, 1).waitForFinalState(true).process(cluster.getSolrClient());
+ cluster.getSolrClient().getZkStateReader().waitForActiveCollection(cluster.getSolrClient().getHttpClient(), "tolerant", 5, TimeUnit.SECONDS, false, 2, 2, true, true);
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
+ }
+
+ @AfterClass
+ public static void afterTestDownShardTolerantSearch() throws Exception {
+ cluster.deleteAllCollections();
+ shutdownCluster();
}
@Test
public void searchingShouldFailWithoutTolerantSearchSetToTrue() throws Exception {
- CollectionAdminRequest.createCollection("tolerant", "conf", 2, 1).waitForFinalState(true).process(cluster.getSolrClient());
-
UpdateRequest update = new UpdateRequest();
for (int i = 0; i < 100; i++) {
update.add("id", Integer.toString(i));
@@ -66,17 +75,21 @@
cluster.waitForJettyToStop(stoppedServer);
- try (SolrClient client = cluster.buildSolrClient()) {
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 1);
- response = client.query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, true));
+ cluster.getSolrClient().getZkStateReader().waitForActiveCollection("tolerant", 5, TimeUnit.SECONDS, false, 2, 1, true, false);
- assertThat(response.getStatus(), is(0));
- assertTrue(response.getResults().getNumFound() > 0);
+ SolrClient client = cluster.getSolrClient();
- Exception e = SolrTestCaseUtil.expectThrows(Exception.class, "Request should have failed because we killed shard1 jetty",
- () -> cluster.getSolrClient().query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, false)));
+ response = client.query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, true));
- assertNotNull(e);
- }
+ assertThat(response.getStatus(), is(0));
+ assertTrue(response.getResults().getNumFound() > 0);
+
+ Exception e = SolrTestCaseUtil.expectThrows(Exception.class, "Request should have failed because we killed shard1 jetty",
+ () -> cluster.getSolrClient().query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, false)));
+
+ assertNotNull(e);
+
}
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java b/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java
index e3ee69a..01d207a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java
@@ -26,7 +26,6 @@
import org.apache.solr.common.SolrInputDocument;
import org.hamcrest.core.IsCollectionContaining;
import org.hamcrest.core.IsEqual;
-import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -67,23 +66,19 @@
}
void populateIndex(int numRuns) throws IOException, SolrServerException {
- try {
- for (int i = 0; i < numRuns; i++) {
- log.debug("Iteration number: {}", i);
- cloudClient.deleteByQuery(COLLECTION, "*:*");
- cloudClient.commit(COLLECTION);
+ for (int i = 0; i < numRuns; i++) {
+ log.debug("Iteration number: {}", i);
+ cloudClient.deleteByQuery(COLLECTION, "*:*");
+ cloudClient.commit(COLLECTION);
- final Collection<SolrInputDocument> solrDocs = generateRandomizedFieldDocuments();
- addToSolr(solrDocs);
+ final Collection<SolrInputDocument> solrDocs = generateRandomizedFieldDocuments();
+ addToSolr(solrDocs);
- final SolrQuery solrQuery = new SolrQuery("*:*");
- solrQuery.setRows(solrDocs.size());
- final SolrDocumentList resultDocs = getSolrResponse(solrQuery, COLLECTION);
- log.debug("{}", resultDocs);
- assertThatDocsHaveCorrectFields(solrDocs, resultDocs);
- }
- } finally {
- cloudClient.close();
+ final SolrQuery solrQuery = new SolrQuery("*:*");
+ solrQuery.setRows(solrDocs.size());
+ final SolrDocumentList resultDocs = getSolrResponse(solrQuery, COLLECTION);
+ log.debug("{}", resultDocs);
+ assertThatDocsHaveCorrectFields(solrDocs, resultDocs);
}
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestHashPartitioner.java b/solr/core/src/test/org/apache/solr/cloud/TestHashPartitioner.java
index 5ddeb39..278adb9 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestHashPartitioner.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestHashPartitioner.java
@@ -253,7 +253,7 @@
// shard3: 00
// shard4: 01
- String[] highBitsToShard = {"shard3","shard4","shard1","shard2"};
+ String[] highBitsToShard = {"shard3","shard4","s1","s2"};
for (int i = 0; i<26; i++) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
index 5e67079..2ceb521 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
@@ -60,7 +60,7 @@
public void run() {
long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert( TEST_NIGHTLY ? 10 : 5, TimeUnit.SECONDS);
while (System.nanoTime() < timeout) {
- long sessionId = zkController.getZkClient().getSolrZooKeeper().getSessionId();
+ long sessionId = zkController.getZkClient().getConnectionManager().getKeeper().getSessionId();
server.expire(sessionId);
try {
Thread.sleep(250);
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
index deed435..c00eee9 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
@@ -408,6 +408,7 @@
} else {
leaderJetty = cluster.getReplicaJetty(s.getLeader());
leaderJetty.stop();
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 1);
waitForState("Leader replica not removed", collectionName, clusterShape(1, 1));
// Wait for cluster state to be updated
waitForState("Replica state not updated in cluster state",
@@ -454,8 +455,11 @@
CollectionAdminRequest.addReplicaToShard(collectionName, "s1", Replica.Type.NRT).waitForFinalState(true).process(cluster.getSolrClient());
} else {
leaderJetty.start();
+
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
}
+
SolrTestCaseJ4.unIgnoreException("No registered leader was found"); // Should have a leader from now on
// Validate that the new nrt replica is the leader now
@@ -478,7 +482,11 @@
// add docs agin
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "zoo"));
s = docCollection.getSlices().iterator().next();
- try (Http2SolrClient leaderClient = SolrTestCaseJ4.getHttpSolrClient(s.getLeader().getCoreUrl())) {
+
+
+ leader = cluster.getSolrClient().getZkStateReader().getLeaderRetry(collectionName, s.getName());
+
+ try (Http2SolrClient leaderClient = SolrTestCaseJ4.getHttpSolrClient(leader.getCoreUrl())) {
leaderClient.commit();
SolrDocumentList results = leaderClient.query(new SolrQuery("*:*")).getResults();
assertEquals(results.toString(), 2, results.getNumFound());
@@ -500,16 +508,19 @@
JettySolrRunner pullReplicaJetty = cluster.getReplicaJetty(docCollection.getSlice("s1").getReplicas(EnumSet.of(Replica.Type.PULL)).get(0));
pullReplicaJetty.stop();
- waitForState("Replica not removed", collectionName, activeReplicaCount(1, 0, 0));
- // Also wait for the replica to be placed in state="down"
- waitForState("Didn't not live state", collectionName, notLive(Replica.Type.PULL));
+
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 1);
+
+ cluster.getSolrClient().getZkStateReader().waitForActiveCollection(cluster.getSolrClient().getHttpClient(), collectionName, 5, TimeUnit.SECONDS, false, 1, 1, true, true);
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "bar"));
cluster.getSolrClient().commit(collectionName);
waitForNumDocsInAllActiveReplicas(2);
pullReplicaJetty.start();
- waitForState("Replica not added", collectionName, activeReplicaCount(1, 0, 1));
+
+ cluster.getSolrClient().getZkStateReader().waitForLiveNodes(5, TimeUnit.SECONDS, (newLiveNodes) -> newLiveNodes.size() == 2);
+ cluster.getSolrClient().getZkStateReader().waitForActiveCollection(cluster.getSolrClient().getHttpClient(), collectionName, 5, TimeUnit.SECONDS, false, 1, 2, true, true);
waitForNumDocsInAllActiveReplicas(2);
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
index 6ac8c58..1975018 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
@@ -79,7 +79,6 @@
CollectionAdminRequest.createCollection(collectionName, configName,
NUM_SHARDS, TEST_NIGHTLY ? REPLICATION_FACTOR : 1)
.setMaxShardsPerNode(10)
- .waitForFinalState(true)
.setProperties(collectionProperties);
assertTrue( cmd.process(cloudSolrClient).isSuccess() );
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
index e30af97..b42f975 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
@@ -103,7 +103,6 @@
CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
.setProperties(collectionProperties)
.setMaxShardsPerNode(10)
- .waitForFinalState(true)
.process(CLOUD_CLIENT);
if (NODE_CLIENTS != null) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java b/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
index bdf3549..6dff5d5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
@@ -93,7 +93,7 @@
zkClient.start();
// Currently no credentials on ZK connection, because those same VM-params are used for adding ACLs, and here we want
// no (or completely open) ACLs added. Therefore hack your way into being authorized for creating anyway
- zkClient.getSolrZooKeeper().addAuthInfo("digest", ("connectAndAllACLUsername:connectAndAllACLPassword")
+ zkClient.getConnectionManager().getKeeper().addAuthInfo("digest", ("connectAndAllACLUsername:connectAndAllACLPassword")
.getBytes(StandardCharsets.UTF_8));
zkClient.create("/unprotectedCreateNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
zkClient.makePath("/unprotectedMakePathNode", "content".getBytes(DATA_ENCODING), CreateMode.PERSISTENT, false);
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
index 6691dc6..47f6a98 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
@@ -148,7 +148,6 @@
assertEquals(1, client.query(collection, query).getResults().getNumFound());
CollectionAdminRequest.DeleteShard req = CollectionAdminRequest.deleteShard(collection, "s2");
- req.setWaitForFinalState(true);
state = req.processAndWait(client, MAX_TIMEOUT_SECONDS);
assertSame("DeleteShard did not complete", RequestStatusState.COMPLETED, state);
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistClusterPerZkTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistClusterPerZkTest.java
index 8a0873a..20cab54 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistClusterPerZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistClusterPerZkTest.java
@@ -75,6 +75,8 @@
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicReference;
/**
* Tests the Cloud Collections API.
@@ -161,7 +163,7 @@
@Test
@LuceneTestCase.Nightly // needs 4 nodes
public void testCoresAreDistributedAcrossNodes() throws Exception {
- CollectionAdminRequest.createCollection("nodes_used_collection", "conf", 2, 2).waitForFinalState(true)
+ CollectionAdminRequest.createCollection("nodes_used_collection", "conf", 2, 2)
.process(cluster.getSolrClient());
Set<String> liveNodes = cluster.getSolrClient().getZkStateReader().getLiveNodes();
@@ -207,7 +209,6 @@
CollectionAdminRequest.createCollection("nodeset_collection", "conf", 2, 1)
.setCreateNodeSet(baseUrls.get(0) + "," + baseUrls.get(1))
- .waitForFinalState(true)
.process(cluster.getSolrClient());
DocCollection collectionState = getCollectionState("nodeset_collection");
@@ -422,7 +423,6 @@
CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
.setMaxShardsPerNode(6)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
ArrayList<String> nodeList
@@ -442,7 +442,26 @@
req = CollectionAdminRequest.addReplicaToShard(collectionName, "s1").withProperty(CoreAdminParams.INSTANCE_DIR, instancePath.toString());
req.setWaitForFinalState(true);
response = req.process(cluster.getSolrClient());
- newReplica = grabNewReplica(response, getCollectionState(collectionName));
+ String replicaName = response.getCollectionCoresStatus().keySet().iterator().next();
+ AtomicReference<Replica> theReplica = new AtomicReference<>();
+ try {
+ cluster.getSolrClient().getZkStateReader().waitForState(collectionName, 15, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
+ if (collectionState == null) {
+ return false;
+ }
+ Replica replica = collectionState.getReplica(replicaName);
+ if (replica != null) {
+ theReplica.set(replica);
+ return true;
+ }
+ return false;
+ });
+ } catch (TimeoutException e) {
+ log.error("timeout",e);
+ throw new TimeoutException("timeout waiting to see " + replicaName);
+ }
+
+ newReplica = theReplica.get();
assertNotNull(newReplica);
try (Http2SolrClient coreclient = SolrTestCaseJ4.getHttpSolrClient(newReplica.getBaseUrl())) {
@@ -459,8 +478,22 @@
req.setWaitForFinalState(true);
response = req.process(cluster.getSolrClient());
- newReplica = grabNewReplica(response, getCollectionState(collectionName));
- // MRM TODO: do we really want to support this anymore?
+ AtomicReference<Replica> theReplica2 = new AtomicReference<>();
+ cluster.getSolrClient().getZkStateReader().waitForState(collectionName, 15, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
+ if (collectionState == null) {
+ return false;
+ }
+ Replica replica = collectionState.getReplica(replicaName);
+ if (replica != null) {
+ theReplica2.set(replica);
+ return true;
+ }
+ return false;
+ });
+
+ newReplica = theReplica2.get();
+ assertNotNull(theReplica2);
+ // MRM TODO: do we really want to support this anymore? We really should control core names for cloud
// assertEquals("'core' should be 'propertyDotName' " + newReplica.getName(), "propertyDotName", newReplica.getName());
}
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/CustomCollectionTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/CustomCollectionTest.java
index 613adc6..131060b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/CustomCollectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/CustomCollectionTest.java
@@ -193,7 +193,6 @@
.process(cluster.getSolrClient());
CollectionAdminRequest.CreateShard req = CollectionAdminRequest.createShard(collectionName, "x");
- req.setWaitForFinalState(true);
req.process(cluster.getSolrClient());
waitForState("Not enough active replicas in shard 'x'", collectionName, (n, c) -> {
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionAPI.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionAPI.java
index b88e93f..7a9decf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionAPI.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionAPI.java
@@ -82,9 +82,9 @@
CollectionAdminRequest.Create req;
if (useTlogReplicas()) {
- req = CollectionAdminRequest.createCollection(COLLECTION_NAME, "_default",2, 0, 1, 0).waitForFinalState(true); // MRM-TEST TODO: 1 pull replica each
+ req = CollectionAdminRequest.createCollection(COLLECTION_NAME, "_default",2, 0, 1, 0); // MRM-TEST TODO: 1 pull replica each
} else {
- req = CollectionAdminRequest.createCollection(COLLECTION_NAME, "_default",2, 1, 0, 0).waitForFinalState(true);
+ req = CollectionAdminRequest.createCollection(COLLECTION_NAME, "_default",2, 1, 0, 0);
}
setV2(req);
client.request(req);
@@ -1029,7 +1029,7 @@
// first, try creating a collection with badconf
// MRM TODO: this should fail with the wait for final state
BaseHttpSolrClient.RemoteSolrException rse = SolrTestCaseUtil.expectThrows(BaseHttpSolrClient.RemoteSolrException.class, () -> {
- CollectionAdminResponse rsp = CollectionAdminRequest.createCollection("testcollection", "badconf", 1, 2).waitForFinalState(true).process(client);
+ CollectionAdminResponse rsp = CollectionAdminRequest.createCollection("testcollection", "badconf", 1, 2).process(client);
});
assertNotNull(rse.getMessage());
assertNotSame(0, rse.code());
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
index 0bd7f6c..ee11bcf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
@@ -74,18 +74,16 @@
super.tearDown();
}
- private void createCollection(String collectionName, String createNodeSet) throws Exception {
+ private void createCollection(String collectionName, String createNodeSet, boolean waitForState) throws Exception {
if (random().nextBoolean()) { // process asynchronously
CollectionAdminRequest.Create req = CollectionAdminRequest.createCollection(collectionName, configName, numShards, numReplicas).setMaxShardsPerNode(maxShardsPerNode)
- .waitForFinalState(true).setCreateNodeSet(createNodeSet);
- req.setWaitForFinalState(true);
+ .setCreateNodeSet(createNodeSet).waitForFinalState(waitForState);
req.processAndWait(cluster.getSolrClient(), 10);
}
else {
CollectionAdminRequest.createCollection(collectionName, configName, numShards, numReplicas)
.setMaxShardsPerNode(maxShardsPerNode)
- .waitForFinalState(true)
- .setCreateNodeSet(createNodeSet)
+ .setCreateNodeSet(createNodeSet).waitForFinalState(waitForState)
.process(cluster.getSolrClient());
}
}
@@ -129,7 +127,7 @@
assertEquals(nodeCount, cluster.getJettySolrRunners().size());
// create collection
- createCollection(collectionName, null);
+ createCollection(collectionName, null, true);
// modify/query collection
new UpdateRequest().add("id", "1").commit(client, collectionName);
@@ -174,7 +172,7 @@
log.info("create collection again");
cluster.getZkClient().printLayout();
// create it again
- createCollection(collectionName, null);
+ createCollection(collectionName, null, false);
// check that there's no left-over state
assertEquals(0, client.query(collectionName, new SolrQuery("*:*")).getResults().getNumFound());
@@ -195,7 +193,7 @@
assertFalse(cluster.getJettySolrRunners().isEmpty());
// create collection
- createCollection(collectionName, ZkStateReader.CREATE_NODE_SET_EMPTY);
+ createCollection(collectionName, ZkStateReader.CREATE_NODE_SET_EMPTY, false);
// check the collection's corelessness
int coreCount = 0;
@@ -227,7 +225,7 @@
final SolrInputDocument doc = new SolrInputDocument();
// create collection
- createCollection(collectionName, null);
+ createCollection(collectionName, null, true);
ZkStateReader zkStateReader = client.getZkStateReader();
diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkCollectionPropsCachingTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkCollectionPropsCachingTest.java
index de1328b..28df5c4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkCollectionPropsCachingTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkCollectionPropsCachingTest.java
@@ -72,29 +72,28 @@
@Test
public void testReadWriteCached() throws InterruptedException, IOException {
- ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
+ try (ZkStateReader zkStateReader = new ZkStateReader(cluster.getZkServer().getZkAddress(), 15000, 30000)) {
+ zkStateReader.createClusterStateWatchersAndUpdate();
- CollectionProperties collectionProps = new CollectionProperties(cluster.getSolrClient().getZkStateReader());
+ CollectionProperties collectionProps = new CollectionProperties(cluster.getSolrClient().getZkStateReader());
- collectionProps.setCollectionProperty(collectionName, "property1", "value1");
- checkValue("property1", "value1"); //Should be no cache, so the change should take effect immediately
+ collectionProps.setCollectionProperty(collectionName, "property1", "value1");
+ checkValue("property1", "value1", zkStateReader); //Should be no cache, so the change should take effect immediately
- zkStateReader.getCollectionProperties(collectionName,100);
- zkStateReader.getZkClient().close();
- assertFalse(zkStateReader.isClosed());
- checkValue("property1", "value1"); //Should be cached, so the change should not try to hit zk
+ zkStateReader.getCollectionProperties(collectionName, 100);
+ zkStateReader.getZkClient().close();
+ assertFalse(zkStateReader.isClosed());
+ checkValue("property1", "value1", zkStateReader); //Should be cached, so the change should not try to hit zk
- Thread.sleep(300); // test the timeout feature
- try {
- checkValue("property1", "value1"); //Should not be cached anymore
- fail("cache should have expired, prev line should throw an exception trying to access zookeeper after closed");
- } catch (Exception e) {
- // expected, because we killed the client in zkStateReader.
- }
+ Thread.sleep(300); // test the timeout feature
+
+ checkValue("property1", "value1", zkStateReader); // even after cache expiration, if we are not connected to zk, we should get the cached props
+
+ }
}
- private void checkValue(String propertyName, String expectedValue) throws InterruptedException {
- final Object value = cluster.getSolrClient().getZkStateReader().getCollectionProperties(collectionName).get(propertyName);
+ private void checkValue(String propertyName, String expectedValue, ZkStateReader zkStateReader ) throws InterruptedException {
+ final Object value = zkStateReader.getCollectionProperties(collectionName).get(propertyName);
assertEquals("Unexpected value for collection property: " + propertyName, expectedValue, value);
}
diff --git a/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java b/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
index f9b5967..b2e9810 100644
--- a/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
+++ b/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java
@@ -24,8 +24,8 @@
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CoreSorter.CountsForEachShard;
-import org.junit.Ignore;
import org.junit.Test;
import static org.mockito.Mockito.mock;
@@ -34,13 +34,13 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
-@Ignore // MRM TODO: this mock test needs updating after dropping the separate solrdispatchfilter zkclient
public class CoreSorterTest extends SolrTestCaseJ4 {
private static final List<CountsForEachShard> inputCounts = Arrays.asList(
@@ -104,6 +104,7 @@
Map<String,DocCollection> collToState = new HashMap<>();
Map<CountsForEachShard, List<CoreDescriptor>> myCountsToDescs = new HashMap<>();
+ long id = 0;
for (Map.Entry<String, List<CountsForEachShard>> entry : collToCounts.entrySet()) {
String collection = entry.getKey();
List<CountsForEachShard> collCounts = entry.getValue();
@@ -125,7 +126,7 @@
Map<String, Replica> replicaMap = replicas.stream().collect(Collectors.toMap(Replica::getName, Function.identity()));
sliceMap.put(slice, new Slice(slice, replicaMap, map(), collection, -1l, nodeName -> "http://" + nodeName));
}
- DocCollection col = new DocCollection(collection, sliceMap, map(), DocRouter.DEFAULT);
+ DocCollection col = new DocCollection(collection, sliceMap, map("id", id++), DocRouter.DEFAULT);
collToState.put(collection, col);
}
// reverse map
@@ -143,6 +144,8 @@
{
when(mockCC.isZooKeeperAware()).thenReturn(true);
+ ZkStateReader mockZkReader= mock(ZkStateReader.class);
+ when(mockZkReader.getLiveNodes()).thenReturn(new HashSet<>(liveNodes));
ZkController mockZKC = mock(ZkController.class);
when(mockCC.getZkController()).thenReturn(mockZKC);
{
@@ -154,6 +157,8 @@
}
}
}
+ when(mockZKC.getCoreContainer()).thenReturn(mockCC);
+ when(mockZKC.getZkStateReader()).thenReturn(mockZkReader);
NodeConfig mockNodeConfig = mock(NodeConfig.class);
when(mockNodeConfig.getNodeName()).thenReturn(thisNode);
diff --git a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
index 6a32a86..d916ce2 100644
--- a/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
+++ b/solr/core/src/test/org/apache/solr/core/ExitableDirectoryReaderTest.java
@@ -25,7 +25,6 @@
import org.apache.solr.response.SolrQueryResponse;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
import static org.apache.solr.common.util.Utils.fromJSONString;
@@ -96,7 +95,6 @@
// removed once it is running and this test should be un-ignored and the assumptions verified.
// With all the weirdness, I'm not going to vouch for this test. Feel free to change it.
@Test
- @Ignore // MRM TODO: - maybe needs a force update
public void testCacheAssumptions() throws Exception {
String fq= "name:d*";
SolrCore core = h.getCore();
@@ -139,7 +137,6 @@
// When looking at a problem raised on the user's list I ran across this anomaly with timeAllowed
// This tests for the second query NOT returning partial results, along with some other
@Test
- @Ignore // MRM TODO: - maybe needs a force update
public void testQueryResults() throws Exception {
String q = "name:e*";
SolrCore core = h.getCore();
diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreCheckLockOnStartupTest.java b/solr/core/src/test/org/apache/solr/core/SolrCoreCheckLockOnStartupTest.java
index 1dd41b7..60c3ca2 100644
--- a/solr/core/src/test/org/apache/solr/core/SolrCoreCheckLockOnStartupTest.java
+++ b/solr/core/src/test/org/apache/solr/core/SolrCoreCheckLockOnStartupTest.java
@@ -25,7 +25,6 @@
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Before;
-import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -35,7 +34,6 @@
import java.nio.file.Files;
import java.util.Map;
-@Ignore // MRM TODO:
public class SolrCoreCheckLockOnStartupTest extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git a/solr/core/src/test/org/apache/solr/core/TestBadConfig.java b/solr/core/src/test/org/apache/solr/core/TestBadConfig.java
index e3d168b..8f57362 100644
--- a/solr/core/src/test/org/apache/solr/core/TestBadConfig.java
+++ b/solr/core/src/test/org/apache/solr/core/TestBadConfig.java
@@ -47,8 +47,6 @@
"useCompoundFile");
}
- @Ignore // this fails because a small change - currently, a SolrCore failing in CoreContainer#load will
- // not fail with an exception, though the exception will be logged - we should check the core init exceptions here
public void testUpdateLogButNoVersionField() throws Exception {
System.setProperty("enable.update.log", "true");
diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
index 8167d6f..e991618 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
@@ -18,7 +18,6 @@
import java.io.IOException;
import java.util.Map;
-import java.util.NoSuchElementException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
@@ -38,7 +37,6 @@
import org.apache.solr.util.TestHarness;
import org.junit.After;
import org.junit.Before;
-import org.junit.Ignore;
import javax.xml.xpath.XPathExpressionException;
@@ -164,29 +162,36 @@
doTestCompressionMode("best_compression", "BEST_COMPRESSION");
}
- @Ignore // MRM TODO: - this test can be flakey after the explicit reload below - some race ...?
public void testMixedCompressionMode() throws Exception {
System.setProperty("tests.COMPRESSION_MODE", "BEST_SPEED");
h.getCoreContainer().reload(h.coreName);
assertU(add(doc("string_f", "1", "text", "foo bar")));
assertU(commit());
- assertCompressionMode("BEST_SPEED", h.getCore());
+ try (SolrCore core = h.getCore()) {
+ assertCompressionMode("BEST_SPEED", core);
+ }
System.setProperty("tests.COMPRESSION_MODE", "BEST_COMPRESSION");
h.getCoreContainer().reload(h.coreName);
assertU(add(doc("string_f", "2", "text", "foo zar")));
assertU(commit());
- assertCompressionMode("BEST_COMPRESSION", h.getCore());
+ try (SolrCore core = h.getCore()) {
+ assertCompressionMode("BEST_COMPRESSION", core);
+ }
System.setProperty("tests.COMPRESSION_MODE", "BEST_SPEED");
h.getCoreContainer().reload(h.coreName);
assertU(add(doc("string_f", "3", "text", "foo zoo")));
assertU(commit());
- assertCompressionMode("BEST_SPEED", h.getCore());
+ try (SolrCore core = h.getCore()) {
+ assertCompressionMode("BEST_SPEED", core);
+ }
assertQ(req("q", "*:*"),
"//*[@numFound='3']");
assertQ(req("q", "text:foo"),
"//*[@numFound='3']");
assertU(optimize("maxSegments", "1"));
- assertCompressionMode("BEST_SPEED", h.getCore());
+ try (SolrCore core = h.getCore()) {
+ assertCompressionMode("BEST_SPEED", core);
+ }
System.clearProperty("tests.COMPRESSION_MODE");
}
@@ -236,23 +241,21 @@
CoreContainer coreContainer = h.getCoreContainer();
-
CoreDescriptor cd = new CoreDescriptor(newCoreName, testSolrHome.resolve(newCoreName), coreContainer);
c = new SolrCore(coreContainer, cd, new ConfigSet("fakeConfigset", config, schema, null, true));
c.start();
assertNull(coreContainer.registerCore(cd, c, false));
h.coreName = newCoreName;
- try (SolrCore core = h.getCore()) {
- assertEquals("We are not using the correct core", "solrconfig_codec2.xml", core.getConfigResource());
+ try {
+ assertEquals("We are not using the correct core", "solrconfig_codec2.xml", c.getConfigResource());
assertU(add(doc("string_f", "foo")));
assertU(commit());
- assertCompressionMode(SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE.name(), core);
+ assertCompressionMode(SchemaCodecFactory.SOLR_DEFAULT_COMPRESSION_MODE.name(), c);
} finally {
- c.close();
h.coreName = previousCoreName;
coreContainer.unload(newCoreName);
}
diff --git a/solr/core/src/test/org/apache/solr/core/TestCustomStream.java b/solr/core/src/test/org/apache/solr/core/TestCustomStream.java
index f931a91..4cffeba 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCustomStream.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCustomStream.java
@@ -28,7 +28,6 @@
/**
* Created by caomanhdat on 6/3/16.
*/
-@Ignore // MRM TODO: debug
public class TestCustomStream extends AbstractFullDistribZkTestBase {
@Test
@@ -47,12 +46,13 @@
Arrays.asList("overlay", "expressible", "hello", "class"),
"org.apache.solr.core.HelloStream",10);
- TestSolrConfigHandler.testForResponseElement(client,
- null,
- "/stream?expr=hello()",
- null,
- Arrays.asList("result-set", "docs[0]", "msg"),
- "Hello World!",10);
+// MRM TODO:
+// TestSolrConfigHandler.testForResponseElement(client,
+// null,
+// "/stream?expr=hello()",
+// null,
+// Arrays.asList("result-set", "docs[0]", "msg"),
+// "Hello World!",10);
}
diff --git a/solr/core/src/test/org/apache/solr/core/TestJmxIntegration.java b/solr/core/src/test/org/apache/solr/core/TestJmxIntegration.java
index cee5f62..2cddbc9 100644
--- a/solr/core/src/test/org/apache/solr/core/TestJmxIntegration.java
+++ b/solr/core/src/test/org/apache/solr/core/TestJmxIntegration.java
@@ -183,7 +183,7 @@
ObjectName name = nameFactory.createName("gauge", registryName, "SEARCHER.searcher.numDocs");
- timeout = new TimeOut(1000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
+ timeout = new TimeOut(5000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
Integer oldNumDocs = null;
while (!timeout.hasTimedOut()) {
nameFactory.createName("gauge", registryName, "SEARCHER.searcher.numDocs");
diff --git a/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java b/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java
index f71d0a5..e5d6cd2 100644
--- a/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java
+++ b/solr/core/src/test/org/apache/solr/core/TestSolrConfigHandler.java
@@ -609,6 +609,7 @@
while (TimeUnit.SECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS) < maxTimeoutSeconds) {
try {
m = testServerBaseUrl == null ? getRespMap(uri, harness) : TestSolrConfigHandlerConcurrent.getAsMap(testServerBaseUrl + uri, cloudSolrClient);
+ log.info("response is {}", m);
} catch (Exception e) {
continue;
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/DaemonStreamApiTest.java b/solr/core/src/test/org/apache/solr/handler/admin/DaemonStreamApiTest.java
index 65e7be3..66cc280 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/DaemonStreamApiTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/DaemonStreamApiTest.java
@@ -99,17 +99,14 @@
// don't look in all replicas.
CollectionAdminRequest.createCollection(SOURCE_COLL, CONF_NAME, 1, 1)
.setMaxShardsPerNode(1)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
CollectionAdminRequest.createCollection(TARGET_COLL, CONF_NAME, 1, 1)
.setMaxShardsPerNode(1)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
CollectionAdminRequest.createCollection(CHECKPOINT_COLL, CONF_NAME, 1, 1)
.setMaxShardsPerNode(1)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
for (int idx = 0; idx < numDaemons; ++idx) {
diff --git a/solr/core/src/test/org/apache/solr/handler/component/ResourceSharingTestComponent.java b/solr/core/src/test/org/apache/solr/handler/component/ResourceSharingTestComponent.java
index db1fe66..63c2522 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/ResourceSharingTestComponent.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/ResourceSharingTestComponent.java
@@ -36,6 +36,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.stream.Stream;
import static org.junit.Assert.assertEquals;
@@ -77,8 +78,12 @@
log.info("Informing test component...");
this.core = core;
ParWork.getRootSharedExecutor().submit(() -> {
- core.getCoreContainer().getZkController().getZkStateReader().waitForActiveCollection(CollectionAdminParams.SYSTEM_COLL, 5, TimeUnit.SECONDS, 1, 1, false);
- this.blob = core.loadDecodeAndCacheBlob(getKey(), new DumbCsvDecoder()).blob;
+ try {
+ core.getCoreContainer().getZkController().getZkStateReader().waitForActiveCollection(CollectionAdminParams.SYSTEM_COLL, 5, TimeUnit.SECONDS, 1, 1, false);
+ } catch (TimeoutException e) {
+ log.error("timeout", e);
+ }
+ this.blob = core.loadDecodeAndCacheBlob(getKey(), new DumbCsvDecoder()).blob;
});
log.info("Test component informed!");
diff --git a/solr/core/src/test/org/apache/solr/handler/component/ShardsWhitelistTest.java b/solr/core/src/test/org/apache/solr/handler/component/ShardsWhitelistTest.java
index e210011..557b646 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/ShardsWhitelistTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/ShardsWhitelistTest.java
@@ -29,6 +29,8 @@
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.hasItem;
@@ -36,11 +38,15 @@
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.hamcrest.CoreMatchers.nullValue;
import java.io.IOException;
+import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
+import java.util.concurrent.TimeoutException;
public class ShardsWhitelistTest extends MultiSolrCloudTestCase {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
/**
* The cluster with this key will include an explicit list of host whitelisted (all hosts in both the clusters)
*/
@@ -148,7 +154,11 @@
clusterId2cluster.forEach((s, miniSolrCloudCluster) -> {
- miniSolrCloudCluster.waitForActiveCollection(COLLECTION_NAME, numShards, numShards);
+ try {
+ miniSolrCloudCluster.waitForActiveCollection(COLLECTION_NAME, numShards, numShards);
+ } catch (TimeoutException e) {
+ log.error("timeout", e);
+ }
List<SolrInputDocument> docs = new ArrayList<>(10);
for (int i = 0; i < 10; i++) {
diff --git a/solr/core/src/test/org/apache/solr/metrics/SolrMetricManagerTest.java b/solr/core/src/test/org/apache/solr/metrics/SolrMetricManagerTest.java
index a9cb4c3..ec0da62 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricManagerTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricManagerTest.java
@@ -32,6 +32,7 @@
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.metrics.reporters.MockMetricReporter;
+import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -43,9 +44,8 @@
initCore("solrconfig.xml", "schema.xml");
}
- @Before
+ @After
public void afterTest() throws Exception {
- System.setProperty("solr.disableDefaultJmxReporter", "false");
deleteCore();
}
diff --git a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
index b2c133b..a30c5dc 100644
--- a/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/metrics/SolrMetricsIntegrationTest.java
@@ -52,6 +52,8 @@
private static final String[] REPORTER_NAMES = {"reporter1", "reporter2"};
private static final String UNIVERSAL = "universal";
private static final String SPECIFIC = "specific";
+
+ private static final String DEFAULT = "default";
private static final String MULTIGROUP = "multigroup";
private static final String MULTIREGISTRY = "multiregistry";
private static final String[] INITIAL_REPORTERS = {REPORTER_NAMES[0], REPORTER_NAMES[1], UNIVERSAL, SPECIFIC, MULTIGROUP, MULTIREGISTRY};
@@ -61,7 +63,7 @@
private CoreContainer cc;
private SolrMetricManager metricManager;
private String tag;
- private int jmxReporter;
+ private volatile int jmxReporter;
@BeforeClass
public static void beforeSolrMetricsIntegrationTest() throws Exception {
@@ -95,7 +97,15 @@
}
// initially there are more reporters, because two of them are added via a matching collection name
Map<String, SolrMetricReporter> reporters = metricManager.getReporters("solr.core." + DEFAULT_TEST_CORENAME);
- assertEquals(INITIAL_REPORTERS.length + jmxReporter, reporters.size());
+
+ for (int i = 0; i < 10; i++) {
+ if (INITIAL_REPORTERS.length + jmxReporter == reporters.size()) {
+ break;
+ }
+ Thread.sleep(250);
+ }
+
+ assertEquals(reporters.toString(), INITIAL_REPORTERS.length + jmxReporter, reporters.size());
for (String r : INITIAL_REPORTERS) {
assertTagged(reporters, r);
}
@@ -150,7 +160,17 @@
long finalCount = timer.getCount();
// MRM TODO: - those timers are disabled right now
// assertEquals("metric counter incorrect", iterations, finalCount - initialCount);
- Map<String,SolrMetricReporter> reporters = metricManager.getReporters(coreMetricManager.getRegistryName());
+ Map<String,SolrMetricReporter> reporters = metricManager.getReporters(coreMetricManager.getRegistryName());;
+
+ for (int i = 0; i < 50; i++) {
+ if ((RENAMED_REPORTERS.length + jmxReporter) != reporters.size()) {
+ Thread.sleep(100);
+ reporters = metricManager.getReporters(coreMetricManager.getRegistryName());
+ } else {
+ break;
+ }
+ }
+
assertEquals(RENAMED_REPORTERS.length + jmxReporter, reporters.size());
// SPECIFIC and MULTIREGISTRY were skipped because they were
diff --git a/solr/core/src/test/org/apache/solr/schema/SchemaApiFailureTest.java b/solr/core/src/test/org/apache/solr/schema/SchemaApiFailureTest.java
index 4a1b08f..fdc8808 100644
--- a/solr/core/src/test/org/apache/solr/schema/SchemaApiFailureTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/SchemaApiFailureTest.java
@@ -42,7 +42,6 @@
System.setProperty("solr.suppressDefaultConfigBootstrap", "false");
configureCluster(1).configure();
CollectionAdminRequest.createCollection(COLLECTION, 2, 1) // _default configset
- .waitForFinalState(true)
.setMaxShardsPerNode(2)
.process(cluster.getSolrClient());
}
diff --git a/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaAPI.java b/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaAPI.java
index 14a7326..bd4eceb 100644
--- a/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaAPI.java
+++ b/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaAPI.java
@@ -21,6 +21,7 @@
import java.lang.invoke.MethodHandles;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.concurrent.TimeUnit;
import org.apache.solr.SolrTestUtil;
import org.apache.solr.client.solrj.SolrServerException;
@@ -32,6 +33,7 @@
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrInputDocument;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
@@ -41,18 +43,26 @@
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@BeforeClass
- public static void createCluster() throws Exception {
+ public static void beforeTestManagedSchemaAPI() throws Exception {
System.setProperty("managed.schema.mutable", "true");
configureCluster(2)
.addConfig("conf1", SolrTestUtil.TEST_PATH().resolve("configsets").resolve("cloud-managed").resolve("conf"))
.configure();
}
+ @AfterClass
+ public static void afterTestManagedSchemaAPI() throws Exception {
+ shutdownCluster();
+ }
+
@Test
public void test() throws Exception {
String collection = "testschemaapi";
- CollectionAdminRequest.createCollection(collection, "conf1", 1, 2)
- .waitForFinalState(true).process(cluster.getSolrClient());
+ CollectionAdminRequest.createCollection(collection, "conf1", 1, 2).process(cluster.getSolrClient());
+
+ cluster.getSolrClient().getZkStateReader().waitForActiveCollection(cluster.getSolrClient().getHttpClient(), collection, 5, TimeUnit.SECONDS, false, 1, 2, true, true);
+
+
testModifyField(collection);
testReloadAndAddSimple(collection);
testAddFieldAndDocument(collection);
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
index de8bd08..b68ec02 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
@@ -551,9 +551,17 @@
assertU(adoc("id", "777"));
delI("777");
assertU(commit()); // arg... commit no longer "commits" unless there has been a change.
- final Gauge filterCacheStats;
+ Gauge filterCacheStats;
try (SolrCore core = h.getCore()) {
filterCacheStats = (Gauge) core.getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.filterCache");
+ if (filterCacheStats == null) {
+ Thread.sleep(250);
+ filterCacheStats = (Gauge) core.getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.filterCache");
+ if (filterCacheStats == null) {
+ Thread.sleep(250);
+ filterCacheStats = (Gauge) core.getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.filterCache");
+ }
+ }
assertNotNull(filterCacheStats);
final Gauge queryCacheStats = (Gauge) core.getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.queryResultCache");
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java
index 3ef58c5..45de3b1 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java
@@ -121,7 +121,6 @@
collectionProperties.put("schema", "schema_latest.xml");
CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
.setProperties(collectionProperties)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
CLOUD_CLIENT = cluster.getSolrClient();
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
index 41d5d92..fff73390 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
@@ -147,7 +147,6 @@
collectionProperties.put("schema", "schema_latest.xml");
CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
.setProperties(collectionProperties)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
CLOUD_CLIENT = cluster.getSolrClient();
@@ -248,7 +247,8 @@
@AfterClass
private static void afterClass() throws Exception {
if (null != CLOUD_CLIENT) {
- CLOUD_CLIENT.close();
+ // CLOUD_CLIENT is not ours to close!
+ // CLOUD_CLIENT.close();
CLOUD_CLIENT = null;
}
for (Http2SolrClient client : CLIENTS) {
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
index f759193..cbf0d01 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
@@ -134,7 +134,6 @@
collectionProperties.put("schema", "schema_latest.xml");
CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
.setProperties(collectionProperties)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
CLOUD_CLIENT = cluster.getSolrClient();
diff --git a/solr/core/src/test/org/apache/solr/security/JWTAuthPluginIntegrationTest.java b/solr/core/src/test/org/apache/solr/security/JWTAuthPluginIntegrationTest.java
index 69ee752..bda884a 100644
--- a/solr/core/src/test/org/apache/solr/security/JWTAuthPluginIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/security/JWTAuthPluginIntegrationTest.java
@@ -299,7 +299,7 @@
return new Pair<>(result, code);
}
- private void createCollection(String collectionName) throws IOException {
+ private void createCollection(String collectionName) throws Exception {
assertEquals(200, get(baseUrl + "/admin/collections?action=CREATE&name=" + collectionName + "&numShards=2", jwtTestToken).second().intValue());
cluster.waitForActiveCollection(collectionName, 2, 2);
}
diff --git a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdateWithRouteField.java b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdateWithRouteField.java
index 86d3415..6448df4 100644
--- a/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdateWithRouteField.java
+++ b/solr/core/src/test/org/apache/solr/update/TestInPlaceUpdateWithRouteField.java
@@ -55,7 +55,7 @@
private static final int NUMBER_OF_DOCS = 100;
private static final String COLLECTION = "collection1";
- private static final String[] shards = new String[]{"shard1","shard2","shard3"};
+ private static final String[] shards = new String[]{"s1","s2","s3"};
@BeforeClass
public static void setupCluster() throws Exception {
diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
index 935d17f..eca17a3 100644
--- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java
@@ -60,7 +60,6 @@
CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
.setMaxShardsPerNode(MAX_SHARDS_PER_NODE)
- .waitForFinalState(true)
.process(cluster.getSolrClient());
final SolrInputDocument doc1 = SolrTestCaseJ4.sdoc(
diff --git a/solr/core/src/test/org/apache/solr/util/TestSolrCLIRunExample.java b/solr/core/src/test/org/apache/solr/util/TestSolrCLIRunExample.java
index 56685d3..70b8d78 100644
--- a/solr/core/src/test/org/apache/solr/util/TestSolrCLIRunExample.java
+++ b/solr/core/src/test/org/apache/solr/util/TestSolrCLIRunExample.java
@@ -386,7 +386,7 @@
// brief wait in case of timing issue in getting the new docs committed
log.warn("Going to wait for 1 second before re-trying query for techproduct example docs ...");
try {
- Thread.sleep(1000);
+ Thread.sleep(100);
} catch (InterruptedException ignore) {
Thread.interrupted();
}
diff --git a/solr/solrj/src/java/org/apache/solr/cli/Stellar.java b/solr/solrj/src/java/org/apache/solr/cli/Stellar.java
index 7f8242e..a6a034b 100644
--- a/solr/solrj/src/java/org/apache/solr/cli/Stellar.java
+++ b/solr/solrj/src/java/org/apache/solr/cli/Stellar.java
@@ -298,7 +298,7 @@
// zk = new ZooKeeperAdmin(host, Integer.parseInt(cl.getOption("timeout")), new MyWatcher(), readOnly, clientConfig);
zkStateReader = new ZkStateReader(host, Integer.parseInt(cl.getOption("timeout")), 10000);
- zk = zkStateReader.getZkClient().getSolrZooKeeper();
+ zk = zkStateReader.getZkClient().getConnectionManager().getKeeper();
solrClient = new CloudHttp2SolrClient.Builder(zkStateReader).build();
zkStateReader.createClusterStateWatchersAndUpdate();
}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistributedLock.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistributedLock.java
index 18312aa..5b1c23f 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistributedLock.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/DistributedLock.java
@@ -123,7 +123,7 @@
try {
ZooKeeperOperation zopdel = () -> {
- zookeeper.getSolrZooKeeper().delete(id, -1);
+ zookeeper.getConnectionManager().getKeeper().delete(id, -1);
return Boolean.TRUE;
};
zopdel.execute();
@@ -185,7 +185,7 @@
*/
private void findPrefixInChildren(String prefix, SolrZkClient zookeeper,
String dir) throws KeeperException, InterruptedException {
- List<String> names = zookeeper.getSolrZooKeeper().getChildren(dir, false);
+ List<String> names = zookeeper.getConnectionManager().getKeeper().getChildren(dir, false);
for (String name : names) {
if (name.startsWith(prefix)) {
id = name;
@@ -194,7 +194,7 @@
}
}
if (id == null) {
- id = zookeeper.getSolrZooKeeper().create(dir + "/" + prefix, data,
+ id = zookeeper.getConnectionManager().getKeeper().create(dir + "/" + prefix, data,
zookeeper.getZkACLProvider().getACLsToAdd(dir + "/" + prefix),
EPHEMERAL_SEQUENTIAL);
log.debug("Created id: {}", id);
@@ -210,14 +210,14 @@
public boolean execute() throws KeeperException, InterruptedException {
do {
if (id == null) {
- long sessionId = zookeeper.getSolrZooKeeper().getSessionId();
+ long sessionId = zookeeper.getConnectionManager().getKeeper().getSessionId();
String prefix = "x-" + sessionId + "-";
// lets try look up the current ID if we failed
// in the middle of creating the znode
findPrefixInChildren(prefix, zookeeper, dir);
idName = new ZNodeName(id);
}
- List<String> names = zookeeper.getSolrZooKeeper()
+ List<String> names = zookeeper.getConnectionManager().getKeeper()
.getChildren(dir, false);
if (names.isEmpty()) {
log.warn(
@@ -237,7 +237,7 @@
ZNodeName lastChildName = lessThanMe.last();
lastChildId = lastChildName.getName();
log.debug("Watching less than me node: {}", lastChildId);
- Stat stat = zookeeper.getSolrZooKeeper()
+ Stat stat = zookeeper.getConnectionManager().getKeeper()
.exists(lastChildId, new LockWatcher());
if (stat != null) {
return Boolean.FALSE;
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/ProtocolSupport.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/ProtocolSupport.java
index 5af94aa..d2c3d49 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/ProtocolSupport.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/ProtocolSupport.java
@@ -153,11 +153,11 @@
final CreateMode flags) {
try {
retryOperation(() -> {
- Stat stat = zookeeper.getSolrZooKeeper().exists(path, false);
+ Stat stat = zookeeper.getConnectionManager().getKeeper().exists(path, false);
if (stat != null) {
return true;
}
- zookeeper.getSolrZooKeeper().create(path, data, acl, flags);
+ zookeeper.getConnectionManager().getKeeper().create(path, data, acl, flags);
return true;
});
} catch (KeeperException | InterruptedException e) {
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
index 69f090b..a510596 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseCloudSolrClient.java
@@ -75,8 +75,6 @@
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
-import org.apache.solr.common.params.CollectionParams;
-import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
@@ -90,9 +88,6 @@
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
-import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
-import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
-import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
import static org.apache.solr.common.params.CommonParams.ADMIN_PATHS;
import static org.apache.solr.common.params.CommonParams.ID;
@@ -102,7 +97,7 @@
private volatile String defaultCollection;
//no of times collection state to be reloaded if stale state error is received
- private static final int MAX_STALE_RETRIES = Integer.parseInt(System.getProperty("cloudSolrClientMaxStaleRetries", "10"));
+ private static final int MAX_STALE_RETRIES = Integer.parseInt(System.getProperty("cloudSolrClientMaxStaleRetries", "20"));
private Random rand = new Random();
private final boolean updatesToLeaders;
@@ -112,7 +107,7 @@
private final ExecutorService threadPool;
private String idField = ID;
public static final String STATE_VERSION = "_stateVer_";
- private long retryExpiryTime = TimeUnit.NANOSECONDS.convert(5, TimeUnit.SECONDS);//3 seconds or 3 million nanos
+ private long retryExpiryTime = TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS);//3 seconds or 3 million nanos
private final Set<String> NON_ROUTABLE_PARAMS;
{
NON_ROUTABLE_PARAMS = new HashSet<>();
@@ -480,11 +475,7 @@
+ collection + " -> " + aliasedCollections);
}
- DocCollection col = getDocCollection(collection, null);
-
-// if (col == null) {
-// throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection not found: " + collection);
-// }
+ DocCollection col = getDocCollection(collection, null, null);
DocRouter router = col.getRouter();
@@ -850,7 +841,6 @@
throws SolrServerException, IOException {
connect(); // important to call this before you start working with the ZkStateReader
-
// build up a _stateVer_ param to pass to the server containing all of the
// external collection state versions involved in this request, which allows
// the server to notify us that our cached state for one or more of the external
@@ -872,7 +862,7 @@
StringBuilder stateVerParamBuilder = null;
for (String requestedCollection : requestedCollectionNames) {
// track the version of state we're using on the client side using the _stateVer_ param
- DocCollection coll = getDocCollection(requestedCollection, null);
+ DocCollection coll = getDocCollection(requestedCollection, null, null);
if (coll == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection not found: " + requestedCollection);
}
@@ -916,7 +906,12 @@
Map invalidStates = (Map) o;
for (Object invalidEntries : invalidStates.entrySet()) {
Map.Entry e = (Map.Entry) invalidEntries;
- getDocCollection((String) e.getKey(), (Integer) e.getValue());
+
+ String[] versionAndUpdatesHash = ((String)e.getValue()).split(">");
+ int version = Integer.parseInt(versionAndUpdatesHash[0]);
+ int updateHash = Integer.parseInt(versionAndUpdatesHash[1]);
+
+ getDocCollection((String) e.getKey(), version, updateHash);
}
}
@@ -971,7 +966,12 @@
// and we could not get any information from the server
//it is probably not worth trying again and again because
// the state would not have been updated
- log.info("trying request again");
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+
+ }
+ log.info("trying request again retryCnt={}", retryCount + 1);
return requestWithRetryOnStaleState(request, retryCount + 1, inputCollections);
}
} else {
@@ -1002,8 +1002,9 @@
!requestedCollections.isEmpty() &&
wasCommError) {
for (DocCollection ext : requestedCollections) {
- DocCollection latestStateFromZk = getDocCollection(ext.getName(), null);
- if (latestStateFromZk != null && latestStateFromZk.getZNodeVersion() != ext.getZNodeVersion()) {
+ DocCollection latestStateFromZk = getDocCollection(ext.getName(), null, null);
+ if (latestStateFromZk != null && latestStateFromZk.getZNodeVersion() != ext.getZNodeVersion()
+ || latestStateFromZk.getStateUpdates().hashCode() != ext.getStateUpdates().hashCode()) {
log.info("stale state:" + latestStateFromZk.getZNodeVersion() + " " + ext.getZNodeVersion());
// looks like we couldn't reach the server because the state was stale == retry
stateWasStale = true;
@@ -1096,66 +1097,37 @@
// Retrieve slices from the cloud state and, for each collection specified, add it to the Map of slices.
List<Replica> sortedReplicas = new ArrayList<>();
List<Replica> replicas = new ArrayList<>();
- for (int i = 0; i < 2; i++) {
- Map<String,Slice> slices = new HashMap<>();
- String shardKeys = reqParams.get(ShardParams._ROUTE_);
- for (String collectionName : collectionNames) {
- DocCollection col = getDocCollection(collectionName, null);
- if (col == null) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection not found: " + collectionName);
- }
- slices.putAll(col.getSlicesMap());
- List<Slice> routeSlices = new ArrayList<>(col.getRouter().getSearchSlices(shardKeys, reqParams, col));
- Collections.shuffle(routeSlices);
- ClientUtils.addSlices(slices, collectionName, routeSlices, true);
+
+ Map<String,Slice> slices = new HashMap<>();
+ String shardKeys = reqParams.get(ShardParams._ROUTE_);
+ for (String collectionName : collectionNames) {
+ DocCollection col = getDocCollection(collectionName, null, null);
+ if (col == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection not found: " + collectionName);
}
+ slices.putAll(col.getSlicesMap());
+ List<Slice> routeSlices = new ArrayList<>(col.getRouter().getSearchSlices(shardKeys, reqParams, col));
+ Collections.shuffle(routeSlices);
+ ClientUtils.addSlices(slices, collectionName, routeSlices, true);
+ }
- // Gather URLs, grouped by leader or replica
+ // Gather URLs, grouped by leader or replica
- for (Slice slice : slices.values()) {
- Replica leader = slice.getLeader();
- ArrayList<Replica> replicaList = new ArrayList<>(slice.getReplicas());
- Collections.shuffle(replicaList);
- for (Replica replica : replicaList) {
- String node = replica.getNodeName();
- if (!liveNodes.contains(node) // Must be a live node to continue
- || replica.getState() != Replica.State.ACTIVE) // Must be an ACTIVE replica to continue
- continue;
- if (sendToLeaders && replica.equals(leader)) {
- sortedReplicas.add(replica); // put leaders here eagerly (if sendToLeader mode)
- } else {
- replicas.add(replica); // replicas here
- }
+ for (Slice slice : slices.values()) {
+ Replica leader = slice.getLeader();
+ ArrayList<Replica> replicaList = new ArrayList<>(slice.getReplicas());
+ Collections.shuffle(replicaList);
+ for (Replica replica : replicaList) {
+ String node = replica.getNodeName();
+ if (!liveNodes.contains(node) // Must be a live node to continue
+ || replica.getState() != Replica.State.ACTIVE) // Must be an ACTIVE replica to continue
+ continue;
+ if (sendToLeaders && replica.equals(leader)) {
+ sortedReplicas.add(replica); // put leaders here eagerly (if sendToLeader mode)
+ } else {
+ replicas.add(replica); // replicas here
}
}
-
- if (sortedReplicas.size() == 0) {
- if (getClusterStateProvider() instanceof ZkClientClusterStateProvider) {
- ZkClientClusterStateProvider provider = (ZkClientClusterStateProvider) getClusterStateProvider();
- getClusterStateProvider().connect();
- for (String collectionName : collectionNames) {
- try {
- provider.zkStateReader.waitForState(collectionName, 5, TimeUnit.SECONDS, (liveNodes1, collectionState) -> {
- if (collectionState == null) {
- return false;
- }
- List<Replica> reps = collectionState.getReplicas();
- for (Replica rep : reps) {
- if (liveNodes1.contains(rep.getNodeName()) && rep.getState() == Replica.State.ACTIVE) {
- return true;
- }
- } return false;
- });
- } catch (InterruptedException e) {
-
- } catch (TimeoutException e) {
-
- }
- }
- }
- } else {
- break;
- }
}
// Sort the leader replicas, if any, according to the request preferences (none if !sendToLeaders)
@@ -1220,13 +1192,13 @@
}
- protected DocCollection getDocCollection(String collection, Integer expectedVersion) throws SolrException {
+ protected DocCollection getDocCollection(String collection, Integer expectedVersion, Integer updateHash) throws SolrException {
if (expectedVersion == null) expectedVersion = -1;
if (collection == null) return null;
ExpiringCachedDocCollection cacheEntry = collectionStateCache.get(collection);
DocCollection col = cacheEntry == null ? null : cacheEntry.cached;
if (col != null) {
- if (expectedVersion <= col.getZNodeVersion()
+ if (expectedVersion <= col.getZNodeVersion() && (expectedVersion == -1 || updateHash == col.getStateUpdates().hashCode())
&& !cacheEntry.shouldRetry()) return col;
}
@@ -1235,23 +1207,23 @@
//no such collection exists
return null;
}
- if (!ref.isLazilyLoaded()) {
- //it is readily available just return it
- return ref.get();
- }
+// if (!ref.isLazilyLoaded()) {
+// //it is readily available just return it
+// return ref.get();
+// }
DocCollection fetchedCol = null;
/*we have waited for sometime just check once again*/
cacheEntry = collectionStateCache.get(collection);
col = cacheEntry == null ? null : cacheEntry.cached;
if (col != null) {
- if (expectedVersion <= col.getZNodeVersion() && !cacheEntry.shouldRetry()) return col;
+ if (expectedVersion <= col.getZNodeVersion() && (expectedVersion == -1 || updateHash == col.getStateUpdates().hashCode()) && !cacheEntry.shouldRetry()) return col;
}
// We are going to fetch a new version
// we MUST try to get a new version
- fetchedCol = ref.get();//this is a call to ZK
+ fetchedCol = ref.get(false);//this is a call to ZK
if (fetchedCol == null) return null;// this collection no more exists
- if (col != null && fetchedCol.getZNodeVersion() == col.getZNodeVersion()) {
+ if (col != null && fetchedCol.getZNodeVersion() == col.getZNodeVersion() && (expectedVersion == -1 || updateHash == fetchedCol.getStateUpdates().hashCode())) {
cacheEntry.setRetriedAt();//we retried and found that it is the same version
cacheEntry.maybeStale = false;
} else {
@@ -1299,7 +1271,7 @@
Map<String,Integer> results = new HashMap<String,Integer>();
if (resp instanceof RouteResponse) {
NamedList routes = ((RouteResponse)resp).getRouteResponses();
- DocCollection coll = getDocCollection(collection, null);
+ DocCollection coll = getDocCollection(collection, null, null);
Map<String,String> leaders = new HashMap<String,String>();
for (Slice slice : coll.getActiveSlices()) {
Replica leader = slice.getLeader();
@@ -1404,18 +1376,4 @@
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "shards" + " is a required param");
}
- public static int getTotalReplicas(ZkNodeProps zkProps) {
- int pullReplicas = zkProps.getInt(ZkStateReader.PULL_REPLICAS, 0);
- int tlogReplicas = zkProps.getInt(TLOG_REPLICAS, 0);
- int nrtReplicas = zkProps.getInt(NRT_REPLICAS, zkProps.getInt(REPLICATION_FACTOR, tlogReplicas > 0 ? 0 : 1));
-
- if (ZkStateReader.CREATE_NODE_SET_EMPTY.equals(zkProps.getStr(ZkStateReader.CREATE_NODE_SET))
- || "".equals(zkProps.getStr(ZkStateReader.CREATE_NODE_SET))) {
- nrtReplicas = 0;
- pullReplicas = 0;
- tlogReplicas = 0;
- }
- return pullReplicas + nrtReplicas + tlogReplicas;
- }
-
}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudHttp2SolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudHttp2SolrClient.java
index e7e60ab..571e511 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudHttp2SolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudHttp2SolrClient.java
@@ -36,6 +36,7 @@
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.QoSParams;
+import org.apache.solr.common.util.CloseTracker;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.slf4j.Logger;
@@ -65,6 +66,8 @@
private Http2SolrClient myClient;
private final boolean clientIsInternal;
+ private CloseTracker closeTracker;
+
/**
* Create a new client object that connects to Zookeeper and is always aware
* of the SolrCloud state. If there is a fully redundant Zookeeper quorum and
@@ -75,6 +78,7 @@
*/
protected CloudHttp2SolrClient(Builder builder) {
super(builder.shardLeadersOnly, builder.parallelUpdates, builder.directUpdatesToLeadersOnly, false);
+ assert (closeTracker = new CloseTracker()) != null;
this.clientIsInternal = builder.httpClient == null;
if (builder.stateProvider == null) {
if (builder.zkHosts != null && builder.zkHosts.size() > 0 && builder.solrUrls != null && builder.solrUrls.size() > 0) {
@@ -176,16 +180,31 @@
}
}
+ public void enableCloseLock() {
+ if (closeTracker != null) {
+ closeTracker.enableCloseLock();
+ }
+ }
+
+ public void disableCloseLock() {
+ if (closeTracker != null) {
+ closeTracker.disableCloseLock();
+ }
+ }
+
@Override
public void close() throws IOException {
- try (ParWork closer = new ParWork(this, true, false)) {
+ super.close();
+
+ try (ParWork closer = new ParWork(this, true, true)) {
closer.collect(stateProvider);
closer.collect(lbClient);
if (clientIsInternal && myClient!=null) {
closer.collect(myClient);
}
}
- super.close();
+
+ assert closeTracker != null ? closeTracker.close() : true;
assert ObjectReleaseTracker.release(this);
}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/Http2SolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/Http2SolrClient.java
index 1020d22..8aa2055 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/Http2SolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/Http2SolrClient.java
@@ -159,8 +159,8 @@
*/
private volatile String serverBaseUrl;
private volatile boolean closeClient;
- private SolrQueuedThreadPool httpClientExecutor;
- private SolrScheduledExecutorScheduler scheduler;
+ private volatile SolrQueuedThreadPool httpClientExecutor;
+ private volatile SolrScheduledExecutorScheduler scheduler;
private volatile boolean closed;
protected Http2SolrClient(String serverBaseUrl, Builder builder) {
@@ -516,7 +516,7 @@
req.afterSend.run();
}
} catch (Exception e) {
-
+ log.debug("failed sending request", e);
if (e != CANCELLED_EXCEPTION) {
asyncListener.onFailure(e, 500);
}
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkClientClusterStateProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkClientClusterStateProvider.java
index e4ce6e5..44c85c6 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkClientClusterStateProvider.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ZkClientClusterStateProvider.java
@@ -153,9 +153,6 @@
@Override
public void connect() {
- if (isClosed) {
- throw new AlreadyClosedException();
- }
if (this.zkStateReader == null) {
synchronized (this) {
if (this.zkStateReader == null) {
@@ -167,9 +164,6 @@
}
public ZkStateReader getZkStateReader() {
- if (isClosed) {
- throw new AlreadyClosedException();
- }
if (zkStateReader == null) {
synchronized (this) {
@@ -183,10 +177,6 @@
@Override
public void close() throws IOException {
- if (isClosed) {
- return;
- }
-
final ZkStateReader zkToClose = zkStateReader;
if (zkToClose != null && closeZkStateReader) {
IOUtils.closeQuietly(zkToClose);
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java
index d7d41d4..c109f14 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java
@@ -225,7 +225,6 @@
return null;
}
DocCollection docCollection = cs.getCollectionsMap().values().iterator().next();
- docCollection.setZnodeVersion(version);
return docCollection;
}
@@ -237,11 +236,11 @@
collections.put(collectionName, new CollectionRef(coll));
}
- return new ClusterState(collections, version);
+ return new ClusterState(collections, -1);
}
// TODO move to static DocCollection.loadFromMap
- private static DocCollection collectionFromObjects(Replica.NodeNameToBaseUrl zkStateReader, String name, Map<String, Object> objs, int version) {
+ private static DocCollection collectionFromObjects(Replica.NodeNameToBaseUrl zkStateReader, String name, Map<String, Object> objs, Integer version) {
Map<String,Object> props;
Map<String,Slice> slices;
@@ -377,11 +376,11 @@
this.coll = coll;
}
- /** Return the DocCollection, always refetching if lazy. Equivalent to get(false)
+ /** Return the DocCollection, using cached state if lazy.
* @return The collection state modeled in zookeeper
*/
public DocCollection get(){
- return get(false);
+ return get(true);
}
/** Return the DocCollection
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java
index 13ad78c..256413e 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java
@@ -27,6 +27,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.BiPredicate;
@@ -51,7 +52,7 @@
public static final String DOC_ROUTER = "router";
public static final String SHARDS = "shards";
- private int znodeVersion;
+ private volatile int znodeVersion;
private final String name;
private final Map<String, Slice> slices;
@@ -63,13 +64,13 @@
private final Integer numPullReplicas;
private final Integer maxShardsPerNode;
private final Boolean readOnly;
- private volatile Map stateUpdates;
+ private volatile ConcurrentHashMap stateUpdates;
private final Long id;
private AtomicInteger sliceAssignCnt = new AtomicInteger();
public DocCollection(String name, Map<String, Slice> slices, Map<String, Object> props, DocRouter router) {
- this(name, slices, props, router, -1, null);
+ this(name, slices, props, router, 0, new ConcurrentHashMap());
}
/**
@@ -78,11 +79,16 @@
* @param props The properties of the slice. This is used directly and a copy is not made.
* @param zkVersion The version of the Collection node in Zookeeper (used for conditional updates).
*/
- public DocCollection(String name, Map<String, Slice> slices, Map<String, Object> props, DocRouter router, int zkVersion, Map stateUpdates) {
+ public DocCollection(String name, Map<String, Slice> slices, Map<String, Object> props, DocRouter router, int zkVersion, ConcurrentHashMap stateUpdates) {
super(props==null ? props = new HashMap<>() : props);
+
this.znodeVersion = zkVersion;
this.name = name;
- this.stateUpdates = stateUpdates;
+ if (stateUpdates == null) {
+ this.stateUpdates = new ConcurrentHashMap();
+ } else {
+ this.stateUpdates = stateUpdates;
+ }
this.slices = slices;
this.replicationFactor = (Integer) verifyProp(props, REPLICATION_FACTOR);
this.numNrtReplicas = (Integer) verifyProp(props, NRT_REPLICAS, 0);
@@ -217,11 +223,12 @@
* Get the list of all leaders hosted on the given node or <code>null</code> if none.
*/
public List<Replica> getLeaderReplicas(String nodeName) {
- Iterator<Map.Entry<String, Slice>> iter = slices.entrySet().iterator();
+ List<String> shuffleSlices = new ArrayList<>(slices.keySet());
+ Collections.shuffle(shuffleSlices);
List<Replica> leaders = new ArrayList<>(slices.size());
- while (iter.hasNext()) {
- Map.Entry<String, Slice> slice = iter.next();
- Replica leader = slice.getValue().getLeader();
+ for (String s : shuffleSlices) {
+ Slice slice = slices.get(s);
+ Replica leader = slice.getLeader();
if (leader != null && leader.getNodeName().equals(nodeName)) {
leaders.add(leader);
}
@@ -446,9 +453,9 @@
return stateUpdates != null;
}
- public void setStateUpdates(Map stateUpdates) {
- this.stateUpdates = stateUpdates;
- }
+// public void setStateUpdates(ConcurrentHashMap stateUpdates) {
+// this.stateUpdates = stateUpdates;
+// }
public void setSliceAssignCnt(int i) {
sliceAssignCnt.set(i);
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java
index 3f58db4..282f670 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/Replica.java
@@ -235,10 +235,13 @@
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
-
Replica replica = (Replica) o;
+ return name.equals(replica.name) && nodeName.equals(replica.nodeName);
+ }
- return name.equals(replica.name);
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, nodeName);
}
/** Also known as coreNodeName. */
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java b/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java
index 1aaa259..3b76602 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/Slice.java
@@ -103,7 +103,7 @@
Object removed = replica.getProperties().remove("numShards");
}
- for (Replica replica : replicas.values()) {
+ for (Replica replica : currentSlice.getReplicas()) {
if (!replicas.containsKey(replica.getName())) {
replicas.put(replica.getName(), replica);
}
@@ -112,8 +112,7 @@
for (String removeReplica : remove) {
replicas.remove(removeReplica);
}
-// / propMap.remove("state");
- // currentSlice.propMap.putAll(propMap);
+
Slice newSlice = new Slice(currentSlice.name, replicas, currentSlice.propMap, currentSlice.collection, collectionId, nodeNameToBaseUrl);
newSlice.setLeader(currentSlice.getLeader());
return newSlice;
@@ -346,6 +345,7 @@
// only to be used by ZkStateWriter currently
public void setState(State state) {
+ propMap.put("state", state.toString());
this.state = state;
}
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 454e30c..0bde772 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -26,8 +26,6 @@
import org.apache.solr.common.util.CloseTracker;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.ObjectReleaseTracker;
-import org.apache.solr.common.util.TimeOut;
-import org.apache.solr.common.util.TimeSource;
import org.apache.zookeeper.AddWatchMode;
import org.apache.zookeeper.AsyncCallback;
import org.apache.zookeeper.CreateMode;
@@ -1050,7 +1048,8 @@
}
if ((stat != null && stat.getDataLength() < maxBytesBeforeSuppress && lines < 4) || path.endsWith("state.json") || path
- .endsWith("security.json") || (path.endsWith("solrconfig.xml") && Boolean.getBoolean("solr.tests.printsolrconfig")) || path.endsWith("_statupdates") || path.contains("/terms/")) {
+ .endsWith("security.json") || (path.endsWith("solrconfig.xml") && Boolean.getBoolean("solr.tests.printsolrconfig")) || path.endsWith("_statupdates")
+ || path.contains("/terms/") || path.endsWith("leader")) {
// if (path.endsWith(".xml")) {
// // this is the cluster state in xml format - lets pretty print
// dataString = prettyPrint(path, dataString);
@@ -1171,14 +1170,6 @@
return zk == null || !zk.getState().isAlive();
}
- public SolrZooKeeper getSolrZooKeeper() {
-
-// if (connManager.getKeeper() == null) {
-// throw new AlreadyClosedException("No ZooKeeper object");
-// }
- return (SolrZooKeeper) connManager.getKeeper();
- }
-
/**
* Validates if zkHost contains a chroot. See http://zookeeper.apache.org/doc/r3.2.2/zookeeperProgrammers.html#ch_zkSessions
*/
@@ -1334,23 +1325,35 @@
}
public void addWatch(String basePath, Watcher watcher, AddWatchMode mode) throws KeeperException, InterruptedException {
- getSolrZooKeeper().addWatch(basePath, watcher == null ? null : wrapWatcher(watcher), mode);
+ addWatch(basePath, watcher, mode, false);
+ }
+
+ public void addWatch(String basePath, Watcher watcher, AddWatchMode mode, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
+ if (retryOnConnLoss) {
+ ZkCmdExecutor.retryOperation(zkCmdExecutor, () -> {
+ connManager.getKeeper().addWatch(basePath, watcher == null ? null : wrapWatcher(watcher), mode);
+ return null;
+ }, false);
+ } else {
+ connManager.getKeeper().addWatch(basePath, watcher == null ? null : wrapWatcher(watcher), mode);
+ }
+
}
public void addWatch(String basePath, Watcher watcher, AddWatchMode mode, AsyncCallback.VoidCallback cb, Object ctx) {
- getSolrZooKeeper().addWatch(basePath, watcher == null ? null : wrapWatcher(watcher), mode, cb, ctx);
+ connManager.getKeeper().addWatch(basePath, watcher == null ? null : wrapWatcher(watcher), mode, cb, ctx);
}
public void removeWatches(String path, Watcher watcher, Watcher.WatcherType watcherType, boolean local, AsyncCallback.VoidCallback cb, Object ctx) {
- getSolrZooKeeper().removeWatches(path, watcher, watcherType, local, cb, ctx);
+ connManager.getKeeper().removeWatches(path, watcher, watcherType, local, cb, ctx);
}
public void removeWatches(String path, Watcher watcher, Watcher.WatcherType watcherType, boolean local) throws KeeperException, InterruptedException {
- getSolrZooKeeper().removeWatches(path, watcher, watcherType, local);
+ connManager.getKeeper().removeWatches(path, watcher, watcherType, local);
}
public long getSessionId() {
- return getSolrZooKeeper().getSessionId();
+ return connManager.getKeeper().getSessionId();
}
/**
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index 256859d..32a695b 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -245,6 +245,12 @@
private volatile String node = null;
private volatile LiveNodeWatcher liveNodesWatcher;
private volatile CollectionsChildWatcher collectionsChildWatcher;
+ private volatile IsLocalLeader isLocalLeader;
+
+ @Override
+ protected void finalize() throws Throwable {
+ super.finalize();
+ }
public static interface CollectionRemoved {
void removed(String collection);
@@ -392,7 +398,13 @@
// Need a copy so we don't delete from what we're iterating over.
watchedCollectionStates.forEach((name, coll) -> {
DocCollection newState = null;
- ReentrantLock collectionStateLock = collectionStateLocks.get(coll);
+
+ if (!collectionStateLocks.containsKey(name)) {
+ ReentrantLock collectionStateLock = new ReentrantLock(true);
+ ReentrantLock oldLock = collectionStateLocks.putIfAbsent(name, collectionStateLock);
+ }
+
+ ReentrantLock collectionStateLock = collectionStateLocks.get(name);
collectionStateLock.lock();
try {
try {
@@ -404,7 +416,7 @@
String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(name);
try {
- getAndProcessStateUpdates(name, stateUpdatesPath, newState, true);
+ newState = getAndProcessStateUpdates(name, stateUpdatesPath, newState, true);
} catch (Exception e) {
log.error("Error fetching state updates", e);
}
@@ -424,6 +436,17 @@
}
+ public void enableCloseLock() {
+ if (closeTracker != null) {
+ closeTracker.enableCloseLock();
+ }
+ }
+
+ public void disableCloseLock() {
+ if (closeTracker != null) {
+ closeTracker.disableCloseLock();
+ }
+ }
public void forciblyRefreshClusterStateSlow(String name) {
try {
@@ -433,17 +456,29 @@
Set<DocCollection> updatedCollections = new HashSet<>();
- DocCollection newState = fetchCollectionState(name);
-
- String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(name);
- try {
- getAndProcessStateUpdates(name, stateUpdatesPath, newState, true);
- } catch (Exception e) {
- log.error("Error fetching state updates", e);
+ if (!collectionStateLocks.containsKey(name)) {
+ ReentrantLock collectionStateLock = new ReentrantLock(true);
+ ReentrantLock oldLock = collectionStateLocks.putIfAbsent(name, collectionStateLock);
}
- if (updateWatchedCollection(name, newState == null ? null : new ClusterState.CollectionRef(newState))) {
- updatedCollections.add(newState);
+ ReentrantLock collectionStateLock = collectionStateLocks.get(name);
+ collectionStateLock.lock();
+ try {
+
+ DocCollection newState = fetchCollectionState(name);
+
+ String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(name);
+ try {
+ newState = getAndProcessStateUpdates(name, stateUpdatesPath, newState, true);
+ } catch (Exception e) {
+ log.error("Error fetching state updates", e);
+ }
+
+ if (updateWatchedCollection(name, newState == null ? null : new ClusterState.CollectionRef(newState))) {
+ updatedCollections.add(newState);
+ }
+ } finally {
+ collectionStateLock.unlock();
}
} catch (KeeperException e) {
@@ -466,25 +501,15 @@
log.debug("compareStateVersions {} {} {}", coll, version, updateHash);
DocCollection collection = getCollectionOrNull(coll);
if (collection == null) return null;
- if (collection.getZNodeVersion() != version || (collection.getZNodeVersion() == version && collection.hasStateUpdates() && updateHash != collection.getStateUpdates().hashCode())) {
- if (log.isDebugEnabled()) {
- log.debug("Server older than client {}<{}", collection.getZNodeVersion(), version);
- }
- DocCollection nu = getCollectionLive(coll);
- log.debug("got collection {} {} {}", nu);
- if (nu == null) return -3;
-
- constructState(nu, "compareStateVersions");
- }
-
- if (collection.getZNodeVersion() == version && (!collection.hasStateUpdates() || updateHash == collection.getStateUpdates().hashCode())) {
+ if (collection.getZNodeVersion() == version && updateHash == collection.getStateUpdates().hashCode()) {
return null;
}
if (log.isDebugEnabled()) {
- log.debug("Wrong version from client [{}]!=[{}]", version, collection.getZNodeVersion());
+ log.debug("Wrong version from client [{}]!=[{}] updatesHash {}!={}", version, collection.getZNodeVersion(), updateHash, collection.getStateUpdates().hashCode());
}
+ // TODO: return state update hash as well and use this? Right now it's just a signal to get both on return
return collection.getZNodeVersion();
}
@@ -639,7 +664,7 @@
*/
private void constructState(DocCollection collection, String caller) {
- log.trace("construct new cluster state on structure change {} {}", caller, collection);
+ log.debug("construct new cluster state on structure change {} {} {}", caller, collection, liveNodes);
log.trace("clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", collectionWatches.keySet(), watchedCollectionStates.keySet(), lazyCollectionStates.keySet(),
clusterState.keySet());
@@ -680,14 +705,16 @@
LazyCollectionRef old = lazyCollectionStates.putIfAbsent(coll, docRef);
if (old == null) {
clusterState.put(coll, docRef);
- ReentrantLock collectionStateLock = new ReentrantLock(true);
- ReentrantLock oldLock = collectionStateLocks.putIfAbsent(coll, collectionStateLock);
log.debug("Created lazy collection {} interesting [{}] watched [{}] lazy [{}] total [{}]", coll, collectionWatches.keySet().size(),
watchedCollectionStates.keySet().size(), lazyCollectionStates.keySet().size(), clusterState.size());
}
}
}
+ if (!collectionStateLocks.containsKey(coll)) {
+ ReentrantLock collectionStateLock = new ReentrantLock(true);
+ ReentrantLock oldLock = collectionStateLocks.putIfAbsent(coll, collectionStateLock);
+ }
}
List<String> finalChildren = children;
@@ -785,35 +812,21 @@
public DocCollection get(boolean allowCached) {
gets.incrementAndGet();
if (!allowCached || lastUpdateTime < 0 || System.nanoTime() - lastUpdateTime > LAZY_CACHE_TIME) {
- boolean shouldFetch = true;
- if (cachedDocCollection != null) {
- Stat exists = null;
- try {
- exists = zkClient.exists(getCollectionPath(collName), null, true);
- } catch (Exception e) {
- log.warn("Exception on state.json exists", e);
- shouldFetch = false;
+ try {
+ DocCollection cdc = getCollectionLive(collName);
+ if (cdc != null) {
+ lastUpdateTime = System.nanoTime();
+ cachedDocCollection = cdc;
+ return cdc;
}
- if (exists != null && exists.getVersion() == cachedDocCollection.getZNodeVersion()) {
- shouldFetch = false;
- }
- }
- if (shouldFetch) {
- try {
- DocCollection cdc = getCollectionLive(collName);
- if (cdc != null) {
- lastUpdateTime = System.nanoTime();
- cachedDocCollection = cdc;
- return cdc;
- }
- } catch (AlreadyClosedException e) {
- return cachedDocCollection;
- } catch (Exception e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- }
+ } catch (AlreadyClosedException e) {
+ return cachedDocCollection;
+ } catch (Exception e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
+
return cachedDocCollection;
}
@@ -979,6 +992,15 @@
this.node = node;
}
+ public void setLeaderChecker(IsLocalLeader isLocalLeader) {
+ this.isLocalLeader = isLocalLeader;
+ }
+
+ public interface IsLocalLeader {
+ boolean isLocalLeader(String name);
+ }
+
+
/**
* Get shard leader properties, with retry if none exist.
*/
@@ -990,15 +1012,19 @@
return getLeaderRetry(collection, shard, timeout, false);
}
+ public Replica getLeaderRetry(String collection, String shard, int timeout, boolean checkValidLeader) throws InterruptedException, TimeoutException {
+ return getLeaderRetry(null, collection, shard, timeout, checkValidLeader);
+ }
/**
* Get shard leader properties, with retry if none exist.
*/
- public Replica getLeaderRetry(String collection, String shard, int timeout, boolean checkValidLeader) throws InterruptedException, TimeoutException {
+ public Replica getLeaderRetry(Http2SolrClient httpClient, String collection, String shard, int timeout, boolean checkValidLeader) throws InterruptedException, TimeoutException {
+ log.debug("get leader timeout={}", timeout);
AtomicReference<Replica> returnLeader = new AtomicReference<>();
DocCollection coll;
int readTimeout = Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "7000"));
TimeOut leaderVerifyTimeout = new TimeOut(timeout, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
- while (true) {
+ while (true && !closed) {
try {
waitForState(collection, timeout, TimeUnit.MILLISECONDS, (n, c) -> {
@@ -1010,7 +1036,7 @@
if (leader.getState() != Replica.State.ACTIVE) {
return false;
}
-
+ log.debug("Found ACTIVE leader for slice={} leader={}", slice.getName(), leader);
returnLeader.set(leader);
return true;
}
@@ -1019,32 +1045,44 @@
});
} catch (TimeoutException e) {
coll = getCollectionOrNull(collection);
+ log.debug("timeout out while waiting to see leader in cluster state {} {}", shard, coll);
throw new TimeoutException(
- "No registered leader was found after waiting for " + timeout + "ms " + ", collection: " + collection + " slice: " + shard + " saw state=" + clusterState.get(collection)
+ "No registered leader was found after waiting for " + timeout + "ms " + ", collection: " + collection + " slice: " + shard + " saw state=" + coll
+ " with live_nodes=" + liveNodes);
}
Replica leader = returnLeader.get();
- if (checkValidLeader) {
- try (Http2SolrClient client = new Http2SolrClient.Builder("").idleTimeout(readTimeout).markInternalRequest().build()) {
- CoreAdminRequest.WaitForState prepCmd = new CoreAdminRequest.WaitForState();
- prepCmd.setCoreName(leader.getName());
- prepCmd.setLeaderName(leader.getName());
- prepCmd.setCollection(leader.getCollection());
- prepCmd.setShardId(leader.getSlice());
-
- prepCmd.setBasePath(leader.getBaseUrl());
-
- try {
- NamedList<Object> result = client.request(prepCmd);
+ if (checkValidLeader && leader != null) {
+ log.info("checking if found leader is valid {}",leader);
+ if (node != null && isLocalLeader != null && leader.getNodeName().equals(node)) {
+ if (isLocalLeader.isLocalLeader(leader.getName())) {
break;
- } catch (Exception e) {
- log.info("failed checking for leader {} {}", leader.getName(), e.getMessage());
+ }
+ } else {
+
+ try (Http2SolrClient client = new Http2SolrClient.Builder(leader.getBaseUrl()).idleTimeout(readTimeout).withHttpClient(httpClient).markInternalRequest().build()) {
+ CoreAdminRequest.WaitForState prepCmd = new CoreAdminRequest.WaitForState();
+ prepCmd.setCoreName(leader.getName());
+ prepCmd.setLeaderName(leader.getName());
+ prepCmd.setCollection(leader.getCollection());
+ prepCmd.setShardId(leader.getSlice());
+ prepCmd.setBasePath(leader.getBaseUrl());
+ try {
+ NamedList<Object> result = client.request(prepCmd);
+ break;
+ } catch (RejectedExecutionException | AlreadyClosedException e) {
+ log.warn("Rejected or already closed, bailing {} {}", leader.getName(), e.getClass().getSimpleName());
+ throw e;
+ } catch (Exception e) {
+ log.info("failed checking for leader {} {}", leader.getName(), e.getMessage());
+ Thread.sleep(50);
+ }
}
}
if (leaderVerifyTimeout.hasTimedOut()) {
- throw new SolrException(ErrorCode.SERVER_ERROR,
- "No registered leader was found " + "collection: " + collection + " slice: " + shard + " saw state=" + clusterState.get(collection) + " with live_nodes=" + liveNodes);
+ log.debug("timeout out while checking if found leader is valid {}", leader);
+ throw new TimeoutException("No registered leader was found " + "collection: " + collection + " slice: " + shard + " saw state=" + clusterState.get(collection) +
+ " with live_nodes=" + liveNodes);
}
} else {
@@ -1052,8 +1090,9 @@
}
}
if (returnLeader.get() == null) {
- throw new SolrException(ErrorCode.SERVER_ERROR,
- "No registered leader was found " + "collection: " + collection + " slice: " + shard + " saw state=" + clusterState.get(collection) + " with live_nodes=" + liveNodes);
+ log.debug("return leader is null");
+ throw new TimeoutException("No registered leader was found " + "collection: " + collection + " slice: " + shard + " saw state=" + clusterState.get(collection) +
+ " with live_nodes=" + liveNodes);
}
return returnLeader.get();
}
@@ -1109,11 +1148,14 @@
public List<Replica> getReplicaProps(String collection, String shardId, String thisCoreNodeName,
Replica.State mustMatchStateFilter, Replica.State mustMatchStateFilter2) {
//TODO: We don't need all these getReplicaProps method overloading. Also, it's odd that the default is to return replicas of type TLOG and NRT only
- return getReplicaProps(collection, shardId, thisCoreNodeName, mustMatchStateFilter, mustMatchStateFilter2, EnumSet.of(Replica.Type.TLOG, Replica.Type.NRT));
+ Set<Replica.State> matchFilters = new HashSet<>(2);
+ matchFilters.add(mustMatchStateFilter);
+ matchFilters.add(mustMatchStateFilter2);
+ return getReplicaProps(collection, shardId, thisCoreNodeName, matchFilters, EnumSet.of(Replica.Type.TLOG, Replica.Type.NRT));
}
public List<Replica> getReplicaProps(String collection, String shardId, String thisCoreNodeName,
- Replica.State mustMatchStateFilter, Replica.State mustMatchStateFilter2, final EnumSet<Replica.Type> acceptReplicaType) {
+ Collection<Replica.State> matchStateFilters, final EnumSet<Replica.Type> acceptReplicaType) {
assert thisCoreNodeName != null;
ClusterState.CollectionRef docCollectionRef = clusterState.get(collection);
@@ -1139,7 +1181,7 @@
String coreNodeName = entry.getValue().getName();
if (liveNodes.contains(nodeProps.getNodeName()) && !coreNodeName.equals(thisCoreNodeName)) {
- if (mustMatchStateFilter == null || (mustMatchStateFilter == nodeProps.getState() || mustMatchStateFilter2 == nodeProps.getState())) {
+ if (matchStateFilters == null || matchStateFilters.size() == 0 || matchStateFilters.contains(nodeProps.getState())) {
nodes.add(nodeProps);
}
}
@@ -1269,7 +1311,7 @@
boolean haveUnexpiredProps = vprops != null && vprops.cacheUntilNs > System.nanoTime();
long untilNs = System.nanoTime() + TimeUnit.NANOSECONDS.convert(cacheForMillis, TimeUnit.MILLISECONDS);
Map<String,String> properties;
- if (haveUnexpiredProps) {
+ if (haveUnexpiredProps || (vprops != null && !zkClient.isConnected())) {
properties = vprops.props;
vprops.cacheUntilNs = Math.max(vprops.cacheUntilNs, untilNs);
} else {
@@ -1436,13 +1478,13 @@
public void createWatch() {
String collectionCSNPath = getCollectionSCNPath(coll);
try {
- zkClient.addWatch(collectionCSNPath, this, AddWatchMode.PERSISTENT);
+ zkClient.addWatch(collectionCSNPath, this, AddWatchMode.PERSISTENT, true);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
try {
- zkClient.addWatch(stateUpdateWatcher.stateUpdatesPath, stateUpdateWatcher, AddWatchMode.PERSISTENT);
+ zkClient.addWatch(stateUpdateWatcher.stateUpdatesPath, stateUpdateWatcher, AddWatchMode.PERSISTENT, true);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
@@ -1468,19 +1510,6 @@
}
}
- public void refreshStateUpdates() {
- if (log.isDebugEnabled()) log.debug("fetch additional state updates {}", coll);
- ReentrantLock collectionStateLock = collectionStateLocks.get(coll);
- try {
- collectionStateLock.lock();
- getAndProcessStateUpdates(coll, stateUpdateWatcher.stateUpdatesPath, getCollectionOrNull(coll), false);
- } catch (Exception e) {
- log.error("Unwatched collection: [{}]", coll, e);
- } finally {
- collectionStateLock.unlock();
- }
- }
-
@Override
public void close() throws IOException {
this.closed = true;
@@ -1546,7 +1575,7 @@
if (EventType.None.equals(event.getType())) {
return;
}
- if (closed) return;
+
if (node != null) {
MDCLoggingContext.setNode(node);
}
@@ -1646,13 +1675,13 @@
log.error("An error has occurred", e);
return;
}
-
- constructState(null, "collection child watcher");
}
public void refresh() {
try {
refreshCollectionList();
+
+ constructState(null, "collection child watcher");
} catch (AlreadyClosedException e) {
} catch (KeeperException e) {
@@ -1755,7 +1784,6 @@
newState = fetchCollectionState(coll);
String stateUpdatesPath = ZkStateReader.getCollectionStateUpdatesPath(coll);
newState = getAndProcessStateUpdates(coll, stateUpdatesPath, newState, true);
- // constructState(newState, "getCollectionLive");
} catch (KeeperException e) {
log.warn("Zookeeper error getting latest collection state for collection={}", coll, e);
return null;
@@ -1771,7 +1799,7 @@
private DocCollection getAndProcessStateUpdates(String coll, String stateUpdatesPath, DocCollection docCollection, boolean live) throws KeeperException, InterruptedException {
try {
- log.trace("get and process state updates for {}", coll);
+ log.debug("get and process state updates for {}", coll);
Stat stat;
try {
@@ -1785,9 +1813,9 @@
}
if (docCollection != null && docCollection.hasStateUpdates()) {
- int oldVersion = (int) docCollection.getStateUpdates().get("_ver_");
- if (stat.getVersion() < oldVersion) {
- if (log.isDebugEnabled()) log.debug("Will not apply state updates, they are for an older set of updates {}, ours is now {}", stat.getVersion(), oldVersion);
+ Integer oldVersion = (Integer) docCollection.getStateUpdates().get("_ver_");
+ if (oldVersion != null && stat.getVersion() < oldVersion) {
+ if (log.isDebugEnabled()) log.debug("Will not apply state updates based on updates znode, they are for an older set of updates {}, ours is now {}", stat.getVersion(), oldVersion);
return docCollection;
}
}
@@ -1813,6 +1841,8 @@
return docCollection;
}
+ m = new ConcurrentHashMap<>(m);
+
Integer version = Integer.parseInt((String) m.get("_cs_ver_"));
log.trace("Got additional state updates with znode version {} for cs version {} updates={}", stat.getVersion(), version, m);
@@ -1822,14 +1852,14 @@
Set<Entry<String,Object>> entrySet = m.entrySet();
if (docCollection != null) {
- if (version < docCollection.getZNodeVersion()) {
- if (log.isDebugEnabled()) log.debug("Will not apply state updates, they are for an older state.json {}, ours is now {}", version, docCollection.getZNodeVersion());
+ if (version > 0 && version < docCollection.getZNodeVersion()) {
+ if (log.isDebugEnabled()) log.debug("Will not apply state updates based on state.json znode, they are for an older state.json {}, ours is now {}", version, docCollection.getZNodeVersion());
return docCollection;
}
if (docCollection.hasStateUpdates()) {
- int oldVersion = (int) docCollection.getStateUpdates().get("_ver_");
- if (stat.getVersion() < oldVersion) {
+ Integer oldVersion = (Integer) docCollection.getStateUpdates().get("_ver_");
+ if (oldVersion != null && stat.getVersion() < oldVersion) {
if (log.isDebugEnabled()) log.debug("Will not apply state updates, they are for an older set of updates {}, ours is now {}", stat.getVersion(), oldVersion);
return docCollection;
}
@@ -1845,7 +1875,7 @@
}
Replica replica = docCollection.getReplicaById(docCollection.getId() + "-" + id);
- log.trace("Got additional state update {} replica={} id={} ids={} {}", state == null ? "leader" : state, replica.getName(), id, docCollection.getReplicaByIds());
+ log.trace("Got additional state update {} replica={} id={} ids={} {}", state == null ? "leader" : state, replica == null ? null : replica.getName(), id, docCollection.getReplicaByIds());
if (replica != null) {
@@ -1893,7 +1923,7 @@
log.trace("add new slice leader={} {}", newSlice.getLeader(), newSlice);
- DocCollection newDocCollection = new DocCollection(coll, newSlices, docCollection.getProperties(), docCollection.getRouter(), version, m);
+ DocCollection newDocCollection = new DocCollection(coll, newSlices, docCollection.getProperties(), docCollection.getRouter(), docCollection.getZNodeVersion(), (ConcurrentHashMap) m);
docCollection = newDocCollection;
} else {
@@ -1917,7 +1947,7 @@
}
} catch (Exception e) {
- log.error("exeption trying to process additional updates", e);
+ log.error("Exception trying to process additional updates", e);
}
return docCollection == null ? docCollection : docCollection;
@@ -1948,11 +1978,11 @@
if (docCollection != null) {
int localVersion = docCollection.getZNodeVersion();
if (log.isDebugEnabled()) log.debug("found version {}, our local version is {}, has updates {}", version, localVersion, docCollection.hasStateUpdates());
- if (docCollection.hasStateUpdates()) {
- if (localVersion > version) {
- return docCollection;
- }
+
+ if (localVersion >= version) {
+ return docCollection;
}
+
}
// if (lazyCollectionStates.containsKey(coll)) {
@@ -2168,7 +2198,8 @@
CollectionStateWatcher sw = watchSet.get();
if (sw != null) {
sw.refresh();
- constructState(null, "registerDocCollectionWatcher");
+ } else {
+ constructState(getCollectionOrNull(collection), "registerDocCollectionWatcher");
}
}
@@ -2209,11 +2240,15 @@
*/
public void waitForState(final String collection, long wait, TimeUnit unit, CollectionStatePredicate predicate)
throws InterruptedException, TimeoutException {
+// NOTE: you cannot shortcut like this - if a zkstatereader has this collection as lazy and we do a waitForState, doing
+ // this kind of shortcut will not give you a tmp watched collection that ensures new collection state notification, and since
+ // we often verify a valid leader after seeing the right state, we will want further state updates until we get the right
+ // state and a valid leader(SyncSliceTest and some others are good at catching this)
- DocCollection coll = getCollectionOrNull(collection);
- if (predicate.matches(getLiveNodes(), coll)) {
- return;
- }
+// DocCollection coll = getCollectionOrNull(collection);
+// if (predicate.matches(getLiveNodes(), coll)) {
+// return;
+// }
final CountDownLatch latch = new CountDownLatch(1);
AtomicReference<DocCollection> docCollection = new AtomicReference<>();
org.apache.solr.common.cloud.CollectionStateWatcher watcher = new PredicateMatcher(predicate, latch, docCollection).invoke();
@@ -2229,40 +2264,132 @@
}
}
- public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) {
+ public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) throws TimeoutException {
waitForActiveCollection(collection, wait, unit, shards, totalReplicas, false);
}
- public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas, boolean exact) {
- waitForActiveCollection(collection, wait, unit, false, shards, totalReplicas, true);
+ public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas, boolean exact) throws TimeoutException {
+ waitForActiveCollection(collection, wait, unit, false, shards, totalReplicas, exact, false);
}
- public void waitForActiveCollection(String collection, long wait, TimeUnit unit, boolean justLeaders, int shards, int totalReplicas, boolean exact) {
+ public void waitForActiveCollection(String collection, long wait, TimeUnit unit, boolean justLeaders, int shards, int totalReplicas, boolean exact, boolean checkValidLeaders)
+ throws TimeoutException {
+ waitForActiveCollection(null, collection, wait, unit, justLeaders, shards, totalReplicas, exact, checkValidLeaders);
+ }
+
+ public void waitForActiveCollection(Http2SolrClient client, String collection, long wait, TimeUnit unit, boolean justLeaders, int shards, int totalReplicas, boolean exact, boolean checkValidLeaders)
+ throws TimeoutException {
log.debug("waitForActiveCollection: {} interesting [{}] watched [{}] lazy [{}] total [{}]", collection, collectionWatches.keySet().size(), watchedCollectionStates.keySet().size(), lazyCollectionStates.keySet().size(),
clusterState.size());
assert collection != null;
- CollectionStatePredicate predicate = expectedShardsAndActiveReplicas(justLeaders, shards, totalReplicas, exact);
+ TimeOut leaderVerifyTimeout = new TimeOut(wait, unit, TimeSource.NANO_TIME);
+ while (true && !closed) {
+ CollectionStatePredicate predicate = expectedShardsAndActiveReplicas(justLeaders, shards, totalReplicas, exact);
- AtomicReference<DocCollection> state = new AtomicReference<>();
- AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
- try {
- waitForState(collection, wait, unit, (n, c) -> {
- state.set(c);
- liveNodesLastSeen.set(n);
+ AtomicReference<DocCollection> state = new AtomicReference<>();
+ AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
+ try {
+ waitForState(collection, wait, unit, (n, c) -> {
+ state.set(c);
+ liveNodesLastSeen.set(n);
- return predicate.matches(n, c);
- });
- } catch (TimeoutException e) {
- throw new RuntimeException("Failed while waiting for active collection" + "\n" + e.getMessage() + " \nShards:" + shards + " Replicas:" + totalReplicas + "\nLive Nodes: " + Arrays.toString(liveNodesLastSeen.get().toArray())
- + "\nLast available state: " + state.get());
- } catch (InterruptedException e) {
- ParWork.propagateInterrupt(e);
- throw new RuntimeException("", e);
+ return predicate.matches(n, c);
+ });
+ } catch (TimeoutException e) {
+ throw new TimeoutException("Failed while waiting for active collection" + "\n" + e.getMessage() + " \nShards:" + shards + " Replicas:" + totalReplicas + "\nLive Nodes: " + Arrays
+ .toString(liveNodesLastSeen.get().toArray()) + "\nLast available state: " + state.get());
+ } catch (InterruptedException e) {
+ ParWork.propagateInterrupt(e);
+ throw new RuntimeException("", e);
+ }
+
+ if (checkValidLeaders) {
+ Boolean success;
+
+ try (Http2SolrClient httpClient = new Http2SolrClient.Builder("").idleTimeout(5000).withHttpClient(client).markInternalRequest().build()) {
+ success = checkLeaders(collection, shards, httpClient);
+ }
+
+ if (success == null || !success) {
+ log.info("Failed confirming all shards have valid leaders");
+ } else {
+ log.info("done checking valid leaders on active collection success={}", success);
+ break;
+ }
+ if (leaderVerifyTimeout.hasTimedOut()) {
+ throw new SolrException(ErrorCode.SERVER_ERROR,
+ "No registered leader was found " + "collection: " + collection + " saw state=" + clusterState.get(collection) + " with live_nodes=" + liveNodes);
+ }
+
+ } else {
+ break;
+ }
}
}
+ private Boolean checkLeaders(String collection, int shards, Http2SolrClient client) {
+ DocCollection coll = getCollectionOrNull(collection);
+ if (coll == null) {
+ return null;
+ }
+
+ Collection<Slice> slices = coll.getSlices();
+ boolean success = true;
+ int validCnt = 0;
+ for (Slice slice : slices) {
+ Replica leader = slice.getLeader();
+ if (leader != null) {
+
+ if (node != null && isLocalLeader != null && leader.getNodeName().equals(node)) {
+ if (!isLocalLeader.isLocalLeader(leader.getName())) {
+ log.info("failed checking for local leader {} {}", leader.getName());
+ success = false;
+ try {
+ Thread.sleep(50);
+ } catch (InterruptedException interruptedException) {
+ ParWork.propagateInterrupt(interruptedException);
+ }
+ break;
+ }
+ } else {
+ CoreAdminRequest.WaitForState prepCmd = new CoreAdminRequest.WaitForState();
+ prepCmd.setCoreName(leader.getName());
+ prepCmd.setLeaderName(leader.getName());
+ prepCmd.setCollection(leader.getCollection());
+ prepCmd.setShardId(leader.getSlice());
+
+ prepCmd.setBasePath(leader.getBaseUrl());
+
+ try {
+ NamedList<Object> result = client.request(prepCmd);
+ log.info("Leader looks valid {}", leader);
+ validCnt++;
+ } catch (RejectedExecutionException | AlreadyClosedException e) {
+ log.warn("Rejected or already closed, bailing {} {}", leader.getName(), e.getClass().getSimpleName());
+ throw e;
+ } catch (Exception e) {
+ log.info("failed checking for leader {} {}", leader.getName(), e.getMessage());
+ success = false;
+ try {
+ Thread.sleep(50);
+ } catch (InterruptedException interruptedException) {
+ ParWork.propagateInterrupt(interruptedException);
+ }
+ break;
+ }
+ }
+ } else {
+ success = false;
+ }
+ }
+ if (validCnt != shards) {
+ return false;
+ }
+ return success;
+ }
+
/**
* Block until a LiveNodesStatePredicate returns true, or the wait times out
* <p>
@@ -2320,8 +2447,8 @@
final DocCollectionAndLiveNodesWatcherWrapper wrapper
= new DocCollectionAndLiveNodesWatcherWrapper(collection, watcher);
- removeDocCollectionWatcher(collection, wrapper);
removeLiveNodesListener(wrapper);
+ removeDocCollectionWatcher(collection, wrapper);
}
/**
@@ -2387,7 +2514,7 @@
// returns true if the state has changed
private boolean updateWatchedCollection(String coll, ClusterState.CollectionRef newState) {
- log.trace("updateWatchedCollection for [{}] [{}]", coll, newState);
+ log.debug("updateWatchedCollection for [{}] [{}]", coll, newState);
try {
if (newState == null) {
if (log.isDebugEnabled()) log.debug("Removing cached collection state for [{}]", coll);
@@ -2432,7 +2559,7 @@
return newState;
}
- if (docCollRef.isLazilyLoaded()) {
+ if (docCollRef.isLazilyLoaded()) { // should not happen
if (watchedCollectionStates.containsKey(coll)) {
update.set(true);
LazyCollectionRef prev = lazyCollectionStates.remove(coll);
@@ -2744,7 +2871,7 @@
log.debug("Checking ZK for most up to date Aliases {}", ALIASES);
// Call sync() first to ensure the subsequent read (getData) is up to date.
// MRM TODO: review
- zkClient.getSolrZooKeeper().sync(ALIASES, null, null);
+ zkClient.getConnectionManager().getKeeper().sync(ALIASES, null, null);
Stat stat = new Stat();
final byte[] data = zkClient.getData(ALIASES, null, stat, true);
return setIfNewer(Aliases.fromJSON(data, stat.getVersion()));
@@ -2943,35 +3070,13 @@
for (Slice slice : activeSlices) {
Replica leader = slice.getLeader();
log.trace("slice is {} and leader is {}", slice.getName(), leader);
- if (leader == null) {
+ if (leader == null && expectedReplicas >= expectedShards) {
log.debug("slice={}", slice);
return false;
- } else {
+ } else if (expectedReplicas >= expectedShards) {
if (leader.getState() != Replica.State.ACTIVE) {
return false;
}
-// CoreAdminRequest.WaitForState prepCmd = new CoreAdminRequest.WaitForState();
-// prepCmd.setCoreName(leader.getName());
-// prepCmd.setLeaderName(leader.getName());
-// prepCmd.setCollection(collectionState.getName());
-// prepCmd.setShardId(slice.getName());
-//
-// int readTimeout = Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "7000"));
-//
-// try (Http2SolrClient client = new Http2SolrClient.Builder(leader.getBaseUrl()).idleTimeout(readTimeout).markInternalRequest().build()) {
-//
-// prepCmd.setBasePath(leader.getBaseUrl());
-//
-// try {
-// NamedList<Object> result = client.request(prepCmd);
-// } catch (SolrServerException | BaseHttpSolrClient.RemoteSolrException e) {
-// log.info("failed checking for leader {} {}", leader.getName(), e.getMessage());
-// return false;
-// } catch (IOException e) {
-// log.info("failed checking for leader {} {}", leader.getName(), e.getMessage());
-// return false;
-// }
-// }
}
if (!justLeaders) {
for (Replica replica : slice) {
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/SolrInternalHttpClient.java b/solr/solrj/src/java/org/apache/solr/common/util/SolrInternalHttpClient.java
index a00d893..bc3abcc 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/SolrInternalHttpClient.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/SolrInternalHttpClient.java
@@ -21,8 +21,11 @@
if (log.isDebugEnabled()) {
log.debug("Stopping {}", this.getClass().getSimpleName());
}
- super.doStop();
- assert ObjectReleaseTracker.release(this);
+ try {
+ super.doStop();
+ } finally {
+ assert ObjectReleaseTracker.release(this);
+ }
}
}
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/SolrQueuedThreadPool.java b/solr/solrj/src/java/org/apache/solr/common/util/SolrQueuedThreadPool.java
index 7be6ad6..2007f74 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/SolrQueuedThreadPool.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/SolrQueuedThreadPool.java
@@ -495,7 +495,7 @@
}
private void ensureThreads() {
- while (true) {
+ while (!closed) {
long counts = _counts.get();
int threads = AtomicBiInteger.getHi(counts);
if (threads == Integer.MIN_VALUE) break;
@@ -770,49 +770,52 @@
// }
public void close() {
-
- removeBean(_tryExecutor);
- _tryExecutor = TryExecutor.NO_TRY;
-
try {
- super.doStop();
- } catch (Exception e) {
- LOG.warn("super.doStop", e);
- return;
- }
-
- setMinThreads(0);
- setIdleTimeout(1);
- setStopTimeout(1);
- // Signal the Runner threads that we are stopping
- int threads = _counts.getAndSetHi(Integer.MIN_VALUE);
-
- BlockingQueue<Runnable> jobs = getQueue();
-
-
- for (int i = 0; i < threads; ++i) {
- jobs.offer(NOOP);
- }
-
-
- closed = true;
-
- if (getBusyThreads() > 0) {
+ removeBean(_tryExecutor);
+ _tryExecutor = TryExecutor.NO_TRY;
try {
- joinThreads(TimeUnit.MILLISECONDS.toNanos(250));
- } catch (InterruptedException e) {
- LOG.warn("Interrupted in joinThreads on close {}", e);
- } catch (TimeoutException e) {
- LOG.warn("Timeout in joinThreads on close {}", e);
- } catch (ExecutionException e) {
- LOG.warn("Execution exception in joinThreads on close {}", e);
+ super.doStop();
+ } catch (Exception e) {
+ LOG.warn("super.doStop", e);
+
}
+
+ setMinThreads(0);
+ setIdleTimeout(1);
+ setStopTimeout(1);
+ // Signal the Runner threads that we are stopping
+ int threads = _counts.getAndSetHi(Integer.MIN_VALUE);
+
+ BlockingQueue<Runnable> jobs = getQueue();
+
+ for (int i = 0; i < threads; ++i) {
+ jobs.offer(NOOP);
+ }
+
+ closed = true;
+
+ if (getBusyThreads() > 0) {
+
+ try {
+ joinThreads(TimeUnit.MILLISECONDS.toNanos(250));
+ } catch (InterruptedException e) {
+ LOG.warn("Interrupted in joinThreads on close {}", e);
+ } catch (TimeoutException e) {
+ LOG.warn("Timeout in joinThreads on close {}", e);
+ } catch (ExecutionException e) {
+ LOG.warn("Execution exception in joinThreads on close {}", e);
+ }
+ }
+
+ if (_budget != null) _budget.reset();
+ } catch (RuntimeException e) {
+ log.warn("Exception closing", e);
+ throw e;
+ } finally {
+ assert ObjectReleaseTracker.release(this);
}
- if (_budget != null) _budget.reset();
-
- assert ObjectReleaseTracker.release(this);
}
// @Override
diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCase.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCase.java
index eb66667..164dd3c 100644
--- a/solr/test-framework/src/java/org/apache/solr/SolrTestCase.java
+++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCase.java
@@ -325,6 +325,8 @@
System.setProperty("urlScheme", "http");
}
+ System.setProperty("lucene.cms.override_spins", "true"); // TODO: detecting spins for every core, every IW#ConcurrentMergeScheduler can be a bit costly, let's detect and cache somehow?
+
System.setProperty("useCompoundFile", "true");
System.setProperty("solr.tests.maxBufferedDocs", "1000");
@@ -429,7 +431,6 @@
System.setProperty("solr.dependentupdate.timeout", "1500");
// System.setProperty("lucene.cms.override_core_count", "3");
- // System.setProperty("lucene.cms.override_spins", "false");
// unlimited - System.setProperty("solr.maxContainerThreads", "300");
System.setProperty("solr.lowContainerThreadsThreshold", "-1");
@@ -821,7 +822,7 @@
return testExecutor;
}
testExecutor = (ParWorkExecutor) ParWork.getParExecutorService(
- "testExecutor", 5, 30, 500, new BlockingArrayQueue(12, 16));
+ "testExecutor", 5, 64, 500, new BlockingArrayQueue(12, 16));
testExecutor.prestartAllCoreThreads();
((ParWorkExecutor) testExecutor).enableCloseLock();
return testExecutor;
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
index 232cb90..a92d394 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractDistribZkTestBase.java
@@ -109,7 +109,7 @@
// // give everyone there own solrhome
// File jettyHome = new File(new File(getSolrHome()).getParentFile(), "jetty" + homeCount.incrementAndGet());
// setupJettySolrHome(jettyHome);
-// JettySolrRunner j = createJetty(jettyHome, null, "shard" + (i + 2));
+// JettySolrRunner j = createJetty(jettyHome, null, "s" + (i + 2));
// j.start();
// jettys.add(j);
// clients.add(createNewSolrClient(j.getLocalPort()));
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index 03b3a5f..b4eb3a5 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -152,8 +152,8 @@
}
}
- public static final String SHARD1 = "shard1";
- public static final String SHARD2 = "shard2";
+ public static final String SHARD1 = "s1";
+ public static final String SHARD2 = "s2";
protected boolean printLayoutOnTearDown = false;
@@ -476,7 +476,7 @@
j.start();
jettys.add(j);
createReplicaRequests.add(CollectionAdminRequest
- .addReplicaToShard(DEFAULT_COLLECTION, "shard" + ((currentI % sliceCount) + 1))
+ .addReplicaToShard(DEFAULT_COLLECTION, "s" + ((currentI % sliceCount) + 1))
.setNode(j.getNodeName())
.setType(Replica.Type.TLOG));
waitForLiveNode(j);
@@ -506,7 +506,7 @@
j.start();
jettys.add(j);
createReplicaRequests.add(CollectionAdminRequest
- .addReplicaToShard(DEFAULT_COLLECTION, "shard" + ((currentI % sliceCount) + 1))
+ .addReplicaToShard(DEFAULT_COLLECTION, "s" + ((currentI % sliceCount) + 1))
.setNode(j.getNodeName())
.setType(Replica.Type.NRT));
waitForLiveNode(j);
@@ -532,7 +532,7 @@
j.start();
jettys.add(j);
createPullReplicaRequests.add(CollectionAdminRequest
- .addReplicaToShard(DEFAULT_COLLECTION, "shard" + ((currentI % sliceCount) + 1))
+ .addReplicaToShard(DEFAULT_COLLECTION, "s" + ((currentI % sliceCount) + 1))
.setNode(j.getNodeName())
.setType(Replica.Type.PULL));
waitForLiveNode(j);
@@ -596,13 +596,17 @@
// MiniSolrCloudCluster.expectedShardsAndActiveReplicas(sliceCount, addReplicas.get()));
waitForActiveReplicaCount(cloudClient, DEFAULT_COLLECTION, addReplicas.get());
+
this.jettys.addAll(jettys);
this.clients.addAll(clients);
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
+
+ zkStateReader.waitForActiveCollection(cloudClient.getHttpClient(), DEFAULT_COLLECTION, 10, TimeUnit.SECONDS, false, sliceCount, addReplicas.get(), true, true);
+
// make sure we have a leader for each shard
for (int i = 1; i <= sliceCount; i++) {
- zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + i, 10000);
+ zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "s" + i, 10000);
}
if (sliceCount > 0) {
@@ -760,6 +764,7 @@
.withServlets(getExtraServlets())
.withFilters(getExtraRequestFilters())
.withSSLConfig(sslConfig.buildServerSSLConfig())
+ .enableProxy(true)
.build();
Properties props = new Properties();
@@ -778,7 +783,7 @@
}
props.setProperty("coreRootDirectory", solrHome.toPath().resolve("cores").toAbsolutePath().toString());
- JettySolrRunner jetty = new JettySolrRunner(solrHome.getPath(), props, jettyconfig, true);
+ JettySolrRunner jetty = new JettySolrRunner(solrHome.getPath(), props, jettyconfig);
return jetty;
}
@@ -961,7 +966,7 @@
StringBuilder sb = new StringBuilder();
for (int i = 0; i < sliceCount; i++) {
if (i > 0) sb.append(',');
- sb.append("shard").append(i + 1);
+ sb.append("s").append(i + 1);
}
params.set("shards", sb.toString());
}
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java b/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
index 165e438..1e952f7 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/ChaosMonkey.java
@@ -38,6 +38,7 @@
import org.apache.solr.common.cloud.Replica.Type;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.SolrZooKeeper;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.ExecutorUtil;
@@ -170,7 +171,7 @@
if (cores != null) {
monkeyLog("Will cause connection loss on " + jetty.getLocalPort());
SolrZkClient zkClient = cores.getZkController().getZkClient();
- zkClient.getSolrZooKeeper().closeCnxn();
+ ((SolrZooKeeper)zkClient.getConnectionManager().getKeeper()).closeCnxn();
}
}
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ClusterChaosMonkey.java b/solr/test-framework/src/java/org/apache/solr/cloud/ClusterChaosMonkey.java
index 9424662..23ae145 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/ClusterChaosMonkey.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/ClusterChaosMonkey.java
@@ -24,7 +24,7 @@
import org.apache.solr.common.cloud.Replica.Type;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
-import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.SolrZooKeeper;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory;
@@ -164,7 +164,7 @@
if (cores != null) {
monkeyLog("Will cause connection loss on " + jetty.getLocalPort());
SolrZkClient zkClient = cores.getZkController().getZkClient();
- zkClient.getSolrZooKeeper().closeCnxn();
+ ((SolrZooKeeper)zkClient.getConnectionManager().getKeeper()).closeCnxn();
}
}
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
index 53b09bb..1aa093c 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
@@ -47,7 +47,9 @@
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.embedded.SSLConfig;
+import org.apache.solr.client.solrj.impl.BaseHttpSolrClient;
import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient;
+import org.apache.solr.client.solrj.impl.Http2SolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.ConfigSetAdminRequest;
import org.apache.solr.common.ParWork;
@@ -58,6 +60,7 @@
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.SolrZooKeeper;
import org.apache.solr.common.cloud.ZkConfigManager;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.IOUtils;
@@ -135,7 +138,7 @@
private final boolean externalZkServer;
private final List<JettySolrRunner> jettys = new CopyOnWriteArrayList<>();
private final Path baseDir;
- private CloudHttp2SolrClient solrClient;
+ private volatile CloudHttp2SolrClient solrClient;
private final JettyConfig jettyConfig;
private final boolean trackJettyMetrics;
@@ -328,7 +331,9 @@
// build the client
solrClient = buildZkReaderAndSolrClient();
+ solrClient.enableCloseLock();
solrZkClient = zkStateReader.getZkClient();
+ solrZkClient.enableCloseLock();
} catch (Throwable t) {
shutdown();
@@ -612,17 +617,24 @@
final Set<String> collections = reader.getClusterState().getCollectionsMap().keySet();
try (ParWork work = new ParWork(this, false, false)) {
collections.forEach(collection -> {
- work.collect("", ()->{
- try {
- CollectionAdminRequest.deleteCollection(collection).process(solrClient);
- } catch (Exception e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- }
- });
+ work.collect("", () -> {
+ try {
+ CollectionAdminRequest.deleteCollection(collection).process(solrClient);
+ } catch (SolrException e) {
+ if (e.code() == 400) {
+ log.warn("400 on collection delete (likely already gone)", e);
+
+ } else {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ }
+ } catch (Exception e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ }
+ });
});
}
}
-
+
public void deleteAllConfigSets() throws SolrServerException, IOException {
List<String> configSetNames = new ConfigSetAdminRequest.List().process(solrClient).getConfigSets();
@@ -648,14 +660,22 @@
}
jettys.clear();
- try (ParWork parWork = new ParWork(this, false, false)) {
+ try (ParWork parWork = new ParWork(this, false, true)) {
parWork.collect(shutdowns);
}
+ if (solrClient != null) {
+ solrClient.disableCloseLock();
+ }
+ if (solrZkClient != null) {
+ solrZkClient.disableCloseLock();
+ }
IOUtils.closeQuietly(solrClient);
IOUtils.closeQuietly(zkStateReader);
+ IOUtils.closeQuietly(solrZkClient);
+
if (!externalZkServer) {
IOUtils.closeQuietly(zkServer);
}
@@ -663,6 +683,8 @@
} finally {
System.clearProperty("zkHost");
solrClient = null;
+ solrZkClient = null;
+ zkStateReader = null;
assert ObjectReleaseTracker.release(this);
}
@@ -762,7 +784,7 @@
CoreContainer cores = jetty.getCoreContainer();
if (cores != null) {
SolrZkClient zkClient = cores.getZkController().getZkClient();
- zkClient.getSolrZooKeeper().closeCnxn();
+ ((SolrZooKeeper)zkClient.getConnectionManager().getKeeper()).closeCnxn();
long sessionId = zkClient.getSessionId();
zkServer.expire(sessionId);
if (log.isInfoEnabled()) {
@@ -848,21 +870,28 @@
throw new SolrException(ErrorCode.NOT_FOUND, "No open Overseer found");
}
- public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) {
+ public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) throws TimeoutException {
waitForActiveCollection(collection, wait, unit, shards, totalReplicas, false);
}
- public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas, boolean exact) {
+ public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas, boolean exact) throws TimeoutException {
zkStateReader.waitForActiveCollection(collection, wait, unit, shards, totalReplicas, exact);
}
- public void waitForActiveCollection(String collection, int shards, int totalReplicas) {
+ public void waitForActiveCollection(String collection, long wait, TimeUnit unit, boolean justLeaders, int shards, int totalReplicas, boolean exact, boolean verifyLeaders) throws TimeoutException {
+ zkStateReader.waitForActiveCollection(collection, wait, unit, justLeaders, shards, totalReplicas, exact, verifyLeaders);
+ }
+
+ public void waitForActiveCollection(Http2SolrClient client, String collection, long wait, TimeUnit unit, boolean justLeaders, int shards, int totalReplicas, boolean exact, boolean verifyLeaders) throws TimeoutException {
+ zkStateReader.waitForActiveCollection(client, collection, wait, unit, justLeaders, shards, totalReplicas, exact, verifyLeaders);
+ }
+
+ public void waitForActiveCollection(String collection, int shards, int totalReplicas) throws TimeoutException {
if (collection == null) throw new IllegalArgumentException("null collection");
waitForActiveCollection(collection, 60, TimeUnit.SECONDS, shards, totalReplicas);
}
- public void waitForActiveCollection(String collection, int shards, int totalReplicas, boolean exact) {
-
+ public void waitForActiveCollection(String collection, int shards, int totalReplicas, boolean exact) throws TimeoutException {
waitForActiveCollection(collection, 60, TimeUnit.SECONDS, shards, totalReplicas, exact);
}
diff --git a/solr/test-framework/src/resources/logconf/log4j2-startup-debug.xml b/solr/test-framework/src/resources/logconf/log4j2-startup-debug.xml
index 6f81795..55547c9 100644
--- a/solr/test-framework/src/resources/logconf/log4j2-startup-debug.xml
+++ b/solr/test-framework/src/resources/logconf/log4j2-startup-debug.xml
@@ -72,6 +72,7 @@
<AsyncLogger name="org.apache.solr.cloud.StatePublisher" level="DEBUG"/>
<AsyncLogger name="org.apache.solr.core.SolrCore" level="DEBUG"/>
+ <AsyncLogger name="org.apache.solr.core.CachingDirectoryFactory" level="DEBUG"/>
<AsyncLogger name="org.apache.solr.core.CoreContainer" level="DEBUG"/>
<AsyncLogger name="org.apache.solr.common.cloud.ZkMaintenanceUtils" level="DEBUG"/>
<AsyncLogger name="org.apache.solr.update.processor.DistributedZkUpdateProcessor" level="DEBUG"/>