SOLR-14892: shards.info with shards.tolerant can yield an empty key (#286)

Improvement / work-around for a bug.

Co-authored-by: Mathieu Marie <mmarie@salesforce.com>
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 76330b4..0cd8f12 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -145,6 +145,9 @@
 * SOLR-17198: AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics.
   (Paul McArthur)
 
+* SOLR-14892: Queries with shards.info and shards.tolerant can yield multiple null keys in place of shard names
+  (Mathieu Marie, David Smiley)
+
 Dependency Upgrades
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index d30d62e..8b34422 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -946,6 +946,7 @@
     Float maxScore = null;
     boolean thereArePartialResults = false;
     Boolean segmentTerminatedEarly = null;
+    int failedShardCount = 0;
     for (ShardResponse srsp : sreq.responses) {
       SolrDocumentList docs = null;
       NamedList<?> responseHeader = null;
@@ -956,7 +957,7 @@
         if (srsp.getException() != null) {
           Throwable t = srsp.getException();
           if (t instanceof SolrServerException) {
-            t = ((SolrServerException) t).getCause();
+            t = t.getCause();
           }
           nl.add("error", t.toString());
           if (!rb.req.getCore().getCoreContainer().hideStackTrace()) {
@@ -964,7 +965,7 @@
             t.printStackTrace(new PrintWriter(trace));
             nl.add("trace", trace.toString());
           }
-          if (srsp.getShardAddress() != null) {
+          if (!StrUtils.isNullOrEmpty(srsp.getShardAddress())) {
             nl.add("shardAddress", srsp.getShardAddress());
           }
         } else {
@@ -994,8 +995,13 @@
         if (srsp.getSolrResponse() != null) {
           nl.add("time", srsp.getSolrResponse().getElapsedTime());
         }
-
-        shardInfo.add(srsp.getShard(), nl);
+        // This ought to be better, but at least this ensures no duplicate keys in JSON result
+        String shard = srsp.getShard();
+        if (StrUtils.isNullOrEmpty(shard)) {
+          failedShardCount += 1;
+          shard = "unknown_shard_" + failedShardCount;
+        }
+        shardInfo.add(shard, nl);
       }
       // now that we've added the shard info, let's only proceed if we have no error.
       if (srsp.getException() != null) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestShardsInfoResponse.java b/solr/core/src/test/org/apache/solr/cloud/TestShardsInfoResponse.java
new file mode 100644
index 0000000..b318d23
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/TestShardsInfoResponse.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import static org.hamcrest.core.IsIterableContaining.hasItems;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.hamcrest.MatcherAssert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test which asserts that shards.info=true works even if several shards are down It must return one
+ * unique key per shard. See SOLR-14892
+ */
+public class TestShardsInfoResponse extends SolrCloudTestCase {
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(3).addConfig("cloud-minimal", configset("cloud-minimal")).configure();
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void searchingWithShardsInfoMustNotReturnEmptyOrDuplicateKeys() throws Exception {
+
+    CollectionAdminRequest.createCollection("collection", "cloud-minimal", 3, 1)
+        .process(cluster.getSolrClient());
+
+    UpdateRequest update = new UpdateRequest();
+    for (int i = 0; i < 100; i++) {
+      update.add("id", Integer.toString(i));
+    }
+    update.commit(cluster.getSolrClient(), "collection");
+
+    // Stops 2 shards
+    for (int i = 0; i < 2; i++) {
+      cluster.waitForJettyToStop(cluster.stopJettySolrRunner(i));
+    }
+
+    QueryResponse response =
+        cluster
+            .getSolrClient()
+            .query(
+                "collection",
+                new SolrQuery("*:*")
+                    .setRows(1)
+                    .setParam(ShardParams.SHARDS_TOLERANT, true)
+                    .setParam(ShardParams.SHARDS_INFO, true));
+    assertEquals(0, response.getStatus());
+    assertTrue(response.getResults().getNumFound() > 0);
+
+    SimpleOrderedMap<Object> shardsInfo =
+        (SimpleOrderedMap<Object>) response.getResponse().get("shards.info");
+    // We verify that there are no duplicate keys in case of 2 shards in error
+    assertEquals(3, shardsInfo.size());
+
+    Collection<String> keys = new ArrayList<>();
+    keys.add(shardsInfo.getName(0));
+    keys.add(shardsInfo.getName(1));
+    keys.add(shardsInfo.getName(2));
+
+    // The names of the shards in error are generated as unknown_shard_1 and unknown_shard_2 because
+    // we could not get the real shard names.
+    MatcherAssert.assertThat(
+        (Iterable<String>) keys, hasItems("unknown_shard_1", "unknown_shard_2"));
+  }
+}