Change default podManagementPolicy and fix issues with managed updates
Also increasing the documentation for the change to the ordering of live
nodes, as well as the change for non-started, out-of-date pods (which
are updated automatically).
Signed-off-by: Houston Putman <houston@apache.org>
diff --git a/README.md b/README.md
index 5ea991c..67fcc6b 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,11 @@
**This means that Kubernetes support is now limited to 1.16+.**
If you are unable to use a newer version of Kubernetes, please install the `v0.2.6` version of the Solr Operator for use with Kubernetes 1.15 and below.
+
+- The default `PodManagementPolicy` for StatefulSets has been changed to `Parallel` from `OrderedReady`.
+This change will not affect existing StatefulSets, as `PodManagementPolicy` cannot be updated.
+In order to continue using `OrderedReady` on new SolrClouds, please use the following setting:
+`SolrCloud.spec.customSolrKubeOptions.statefulSetOptions.podManagementPolicy`
- The `SolrCloud` and `SolrPrometheusExporter` services' portNames have changed to `"solr-client"` and `"solr-metrics"` from `"ext-solr-client"` and `"ext-solr-metrics"`, respectively.
This is due to a bug in Kubernetes where `portName` and `targetPort` must match for services.
diff --git a/api/v1beta1/common_types.go b/api/v1beta1/common_types.go
index bf47c0b..852df75 100644
--- a/api/v1beta1/common_types.go
+++ b/api/v1beta1/common_types.go
@@ -34,7 +34,7 @@
Labels map[string]string `json:"labels,omitempty"`
// PodManagementPolicy defines the policy for creating pods under a stateful set.
- // Override the default value of OrderedReady.
+ // Override the default value of Parallel.
// This cannot be updated on an existing StatefulSet, the StatefulSet must be deleted and recreated for a change in this field to take effect.
//
// +optional
diff --git a/config/crd/bases/solr.bloomberg.com_solrclouds.yaml b/config/crd/bases/solr.bloomberg.com_solrclouds.yaml
index cd33258..eb34dd4 100644
--- a/config/crd/bases/solr.bloomberg.com_solrclouds.yaml
+++ b/config/crd/bases/solr.bloomberg.com_solrclouds.yaml
@@ -4277,7 +4277,7 @@
description: Labels to be added for the StatefulSet.
type: object
podManagementPolicy:
- description: PodManagementPolicy defines the policy for creating pods under a stateful set. Override the default value of OrderedReady. This cannot be updated on an existing StatefulSet, the StatefulSet must be deleted and recreated for a change in this field to take effect.
+ description: PodManagementPolicy defines the policy for creating pods under a stateful set. Override the default value of Parallel. This cannot be updated on an existing StatefulSet, the StatefulSet must be deleted and recreated for a change in this field to take effect.
enum:
- OrderedReady
- Parallel
diff --git a/controllers/solrcloud_controller.go b/controllers/solrcloud_controller.go
index d4c23cf..800e459 100644
--- a/controllers/solrcloud_controller.go
+++ b/controllers/solrcloud_controller.go
@@ -302,20 +302,28 @@
}
}
- var outOfDatePods []corev1.Pod
+ var outOfDatePods, outOfDatePodsNotStarted []corev1.Pod
var availableUpdatedPodCount int
- outOfDatePods, availableUpdatedPodCount, err = reconcileCloudStatus(r, instance, &newStatus, statefulSetStatus)
+ outOfDatePods, outOfDatePodsNotStarted, availableUpdatedPodCount, err = reconcileCloudStatus(r, instance, &newStatus, statefulSetStatus)
if err != nil {
return requeueOrNot, err
}
// Manage the updating of out-of-spec pods, if the Managed UpdateStrategy has been specified.
- totalPodCount := int(newStatus.Replicas)
- if instance.Spec.UpdateStrategy.Method == solr.ManagedUpdate && len(outOfDatePods) > 0 && totalPodCount > 0 {
+ totalPodCount := int(*instance.Spec.Replicas)
+ if instance.Spec.UpdateStrategy.Method == solr.ManagedUpdate && len(outOfDatePods)+len(outOfDatePodsNotStarted) > 0 {
+ updateLogger := logger.WithName("ManagedUpdateSelector")
+
+ // The out of date pods that have not been started, should all be updated immediately.
+ // There is no use "safely" updating pods which have not been started yet.
+ podsToUpdate := outOfDatePodsNotStarted
+ for _, pod := range outOfDatePodsNotStarted {
+ logger.Info("Pod killed for update.", "pod", pod.Name, "reason", "The solr container in the pod has not yet started, thus it is safe to update.")
+ }
// Pick which pods should be deleted for an update.
// Don't exit on an error, which would only occur because of an HTTP Exception. Requeue later instead.
- updateLogger := logger.WithName("ManagedUpdateSelector")
- podsToUpdate, retryLater := util.DeterminePodsSafeToUpdate(instance, outOfDatePods, totalPodCount, int(newStatus.ReadyReplicas), availableUpdatedPodCount, updateLogger)
+ additionalPodsToUpdate, retryLater := util.DeterminePodsSafeToUpdate(instance, outOfDatePods, totalPodCount, int(newStatus.ReadyReplicas), availableUpdatedPodCount, len(outOfDatePodsNotStarted), updateLogger)
+ podsToUpdate = append(podsToUpdate, additionalPodsToUpdate...)
for _, pod := range podsToUpdate {
err = r.Delete(context.Background(), &pod, client.Preconditions{
@@ -370,7 +378,7 @@
return requeueOrNot, nil
}
-func reconcileCloudStatus(r *SolrCloudReconciler, solrCloud *solr.SolrCloud, newStatus *solr.SolrCloudStatus, statefulSetStatus appsv1.StatefulSetStatus) (outOfDatePods []corev1.Pod, availableUpdatedPodCount int, err error) {
+func reconcileCloudStatus(r *SolrCloudReconciler, solrCloud *solr.SolrCloud, newStatus *solr.SolrCloudStatus, statefulSetStatus appsv1.StatefulSetStatus) (outOfDatePods []corev1.Pod, outOfDatePodsNotStarted []corev1.Pod, availableUpdatedPodCount int, err error) {
foundPods := &corev1.PodList{}
selectorLabels := solrCloud.SharedLabels()
selectorLabels["technology"] = solr.SolrTechnologyLabel
@@ -383,7 +391,7 @@
err = r.List(context.TODO(), foundPods, listOps)
if err != nil {
- return outOfDatePods, availableUpdatedPodCount, err
+ return outOfDatePods, outOfDatePodsNotStarted, availableUpdatedPodCount, err
}
var otherVersions []string
@@ -442,7 +450,23 @@
availableUpdatedPodCount += 1
}
} else {
- outOfDatePods = append(outOfDatePods, p)
+ containerNotStarted := false
+ if !nodeStatus.Ready {
+ containerNotStarted = true
+ // Gather whether the solr container has started or not.
+ // If it hasn't, then the pod can safely be deleted irrespective of maxNodesUnavailable.
+ // This is useful for podTemplate updates that override pod specs that failed to start, such as containers with images that do not exist.
+ for _, containerStatus := range p.Status.ContainerStatuses {
+ if containerStatus.Name == util.SolrNodeContainer {
+ containerNotStarted = containerStatus.Started == nil || !*containerStatus.Started
+ }
+ }
+ }
+ if containerNotStarted {
+ outOfDatePodsNotStarted = append(outOfDatePodsNotStarted, p)
+ } else {
+ outOfDatePods = append(outOfDatePods, p)
+ }
}
nodeStatusMap[nodeStatus.Name] = nodeStatus
@@ -473,7 +497,7 @@
newStatus.ExternalCommonAddress = &extAddress
}
- return outOfDatePods, availableUpdatedPodCount, nil
+ return outOfDatePods, outOfDatePodsNotStarted, availableUpdatedPodCount, nil
}
func reconcileNodeService(r *SolrCloudReconciler, logger logr.Logger, instance *solr.SolrCloud, nodeName string) (err error, ip string) {
diff --git a/controllers/solrcloud_controller_test.go b/controllers/solrcloud_controller_test.go
index c786a4e..eb49d01 100644
--- a/controllers/solrcloud_controller_test.go
+++ b/controllers/solrcloud_controller_test.go
@@ -119,7 +119,7 @@
// Check the update strategy
assert.EqualValues(t, appsv1.OnDeleteStatefulSetStrategyType, statefulSet.Spec.UpdateStrategy.Type, "Incorrect statefulset update strategy")
- assert.EqualValues(t, appsv1.OrderedReadyPodManagement, statefulSet.Spec.PodManagementPolicy, "Incorrect statefulset pod management policy")
+ assert.EqualValues(t, appsv1.ParallelPodManagement, statefulSet.Spec.PodManagementPolicy, "Incorrect statefulset pod management policy")
// Host Alias Tests
assert.Nil(t, statefulSet.Spec.Template.Spec.HostAliases, "There is no need for host aliases because traffic is going directly to pods.")
@@ -200,7 +200,7 @@
StatefulSetOptions: &solr.StatefulSetOptions{
Annotations: testSSAnnotations,
Labels: testSSLabels,
- PodManagementPolicy: appsv1.ParallelPodManagement,
+ PodManagementPolicy: appsv1.OrderedReadyPodManagement,
},
CommonServiceOptions: &solr.ServiceOptions{
Annotations: testCommonServiceAnnotations,
@@ -286,7 +286,7 @@
// Check the update strategy
assert.EqualValues(t, appsv1.RollingUpdateStatefulSetStrategyType, statefulSet.Spec.UpdateStrategy.Type, "Incorrect statefulset update strategy")
- assert.EqualValues(t, appsv1.ParallelPodManagement, statefulSet.Spec.PodManagementPolicy, "Incorrect statefulset pod management policy")
+ assert.EqualValues(t, appsv1.OrderedReadyPodManagement, statefulSet.Spec.PodManagementPolicy, "Incorrect statefulset pod management policy")
// Check the client Service
service := expectService(t, g, requests, expectedCloudRequest, cloudCsKey, statefulSet.Spec.Selector.MatchLabels)
@@ -399,7 +399,7 @@
// Check the update strategy
assert.EqualValues(t, appsv1.OnDeleteStatefulSetStrategyType, statefulSet.Spec.UpdateStrategy.Type, "Incorrect statefulset update strategy")
- assert.EqualValues(t, appsv1.OrderedReadyPodManagement, statefulSet.Spec.PodManagementPolicy, "Incorrect statefulset pod management policy")
+ assert.EqualValues(t, appsv1.ParallelPodManagement, statefulSet.Spec.PodManagementPolicy, "Incorrect statefulset pod management policy")
}
func TestCloudWithExternalZookeeperChroot(t *testing.T) {
diff --git a/controllers/util/common.go b/controllers/util/common.go
index da0681d..b536da6 100644
--- a/controllers/util/common.go
+++ b/controllers/util/common.go
@@ -189,7 +189,7 @@
to.Spec.Rules = from.Spec.Rules
} else {
for i := range from.Spec.Rules {
- ruleBase := "Spec.Rules["+strconv.Itoa(i)+"]."
+ ruleBase := "Spec.Rules[" + strconv.Itoa(i) + "]."
fromRule := &from.Spec.Rules[i]
toRule := &to.Spec.Rules[i]
@@ -209,7 +209,7 @@
toRule.HTTP.Paths = fromRule.HTTP.Paths
} else {
for j := range fromRule.HTTP.Paths {
- pathBase := ruleBase+"HTTP.Paths["+strconv.Itoa(j)+"]."
+ pathBase := ruleBase + "HTTP.Paths[" + strconv.Itoa(j) + "]."
fromPath := &fromRule.HTTP.Paths[j]
toPath := &toRule.HTTP.Paths[j]
@@ -427,7 +427,7 @@
*toPtr = from
} else {
for i := 0; i < len(from); i++ {
- containerBasePath := basePath+"["+strconv.Itoa(i)+"]."
+ containerBasePath := basePath + "[" + strconv.Itoa(i) + "]."
if !DeepEqualWithNils(to[i].Name, from[i].Name) {
requireUpdate = true
logger.Info("Update required because field changed", "field", containerBasePath+"Name", "from", to[i].Name, "to", from[i].Name)
diff --git a/controllers/util/solr_update_util.go b/controllers/util/solr_update_util.go
index 0d7ce27..a3ac8c5 100644
--- a/controllers/util/solr_update_util.go
+++ b/controllers/util/solr_update_util.go
@@ -34,14 +34,19 @@
// DeterminePodsSafeToUpdate takes a list of solr Pods and returns a list of pods that are safe to upgrade now.
// This function MUST be idempotent and return the same list of pods given the same kubernetes/solr state.
+//
+// NOTE: It is assumed that the list of pods provided are all started.
+// If an out of date pod has a solr container that is not started, it should be accounted for in outOfDatePodsNotStartedCount not outOfDatePods.
+//
// TODO:
// - Think about caching this for ~250 ms? Not a huge need to send these requests milliseconds apart.
// - Might be too much complexity for very little gain.
-func DeterminePodsSafeToUpdate(cloud *solr.SolrCloud, outOfDatePods []corev1.Pod, totalPods int, readyPods int, availableUpdatedPodCount int, logger logr.Logger) (podsToUpdate []corev1.Pod, retryLater bool) {
+func DeterminePodsSafeToUpdate(cloud *solr.SolrCloud, outOfDatePods []corev1.Pod, totalPods int, readyPods int, availableUpdatedPodCount int, outOfDatePodsNotStartedCount int, logger logr.Logger) (podsToUpdate []corev1.Pod, retryLater bool) {
// Before fetching the cluster state, be sure that there is room to update at least 1 pod
- maxPodsUnavailable, unavailableUpdatedPodCount, maxPodsToUpdate := calculateMaxPodsToUpdate(cloud, totalPods, len(outOfDatePods), availableUpdatedPodCount)
+ maxPodsUnavailable, unavailableUpdatedPodCount, maxPodsToUpdate := calculateMaxPodsToUpdate(cloud, totalPods, len(outOfDatePods), outOfDatePodsNotStartedCount, availableUpdatedPodCount)
if maxPodsToUpdate <= 0 {
- logger.Info("Pod update selection canceled. The number of updated pods unavailable equals or exceeds the calculated maxPodsUnavailable.", "unavailableUpdatedPods", unavailableUpdatedPodCount, "maxPodsUnavailable", maxPodsUnavailable)
+ logger.Info("Pod update selection canceled. The number of updated pods unavailable equals or exceeds the calculated maxPodsUnavailable.",
+ "unavailableUpdatedPods", unavailableUpdatedPodCount, "outOfDatePodsNotStarted", outOfDatePodsNotStartedCount, "maxPodsUnavailable", maxPodsUnavailable)
} else {
clusterResp := &solr_api.SolrClusterStatusResponse{}
overseerResp := &solr_api.SolrOverseerStatusResponse{}
@@ -68,7 +73,7 @@
}
// If the update logic already wants to retry later, then do not pick any pods
if !retryLater {
- logger.Info("Pod update selection started.", "outOfDatePods", len(outOfDatePods), "maxPodsUnavailable", maxPodsUnavailable, "unavailableUpdatedPods", unavailableUpdatedPodCount, "maxPodsToUpdate", maxPodsToUpdate)
+ logger.Info("Pod update selection started.", "outOfDatePods", len(outOfDatePods), "maxPodsUnavailable", maxPodsUnavailable, "unavailableUpdatedPods", unavailableUpdatedPodCount, "outOfDatePodsNotStarted", outOfDatePodsNotStartedCount, "maxPodsToUpdate", maxPodsToUpdate)
podsToUpdate = pickPodsToUpdate(cloud, outOfDatePods, clusterResp.ClusterStatus, overseerResp.Leader, totalPods, maxPodsToUpdate, logger)
// If there are no pods to upgrade, even though the maxPodsToUpdate is >0, then retry later because the issue stems from cluster state
@@ -82,12 +87,16 @@
}
// calculateMaxPodsToUpdate determines the maximum number of additional pods that can be updated.
-func calculateMaxPodsToUpdate(cloud *solr.SolrCloud, totalPods int, outOfDatePodCount int, availableUpdatedPodCount int) (maxPodsUnavailable int, unavailableUpdatedPodCount int, maxPodsToUpdate int) {
+func calculateMaxPodsToUpdate(cloud *solr.SolrCloud, totalPods int, outOfDatePodCount int, outOfDatePodsNotStartedCount int, availableUpdatedPodCount int) (maxPodsUnavailable int, unavailableUpdatedPodCount int, maxPodsToUpdate int) {
// In order to calculate the number of updated pods that are unavailable take all pods, take the total pods and subtract those that are available and updated, and those that are not updated.
- unavailableUpdatedPodCount = totalPods - availableUpdatedPodCount - outOfDatePodCount
+ unavailableUpdatedPodCount = totalPods - availableUpdatedPodCount - outOfDatePodCount - outOfDatePodsNotStartedCount
// If the maxBatchNodeUpgradeSpec is passed as a decimal between 0 and 1, then calculate as a percentage of the number of nodes.
maxPodsUnavailable, _ = ResolveMaxPodsUnavailable(cloud.Spec.UpdateStrategy.ManagedUpdateOptions.MaxPodsUnavailable, totalPods)
- return maxPodsUnavailable, unavailableUpdatedPodCount, maxPodsUnavailable - unavailableUpdatedPodCount
+ // Subtract:
+ // - unavailableUpdatedPodCount, because those pods are already unavailable
+ // - outOfDatePodsNotStartedCount, because those pods will always be deleted first and affect the number of unavailable pods
+ maxPodsToUpdate = maxPodsUnavailable - unavailableUpdatedPodCount - outOfDatePodsNotStartedCount
+ return maxPodsUnavailable, unavailableUpdatedPodCount, maxPodsToUpdate
}
func pickPodsToUpdate(cloud *solr.SolrCloud, outOfDatePods []corev1.Pod, clusterStatus solr_api.SolrClusterStatus,
@@ -186,8 +195,6 @@
return podsToUpdate
}
-// TODO: Check to see if any of the podsToUpgrade are down. If so go ahead and update them? (Need to think more on this). This may be taken care of by the check of "live".
-// TODO: Think on where the liveness check should be in the ordering.
func sortNodePodsBySafety(outOfDatePods []corev1.Pod, nodeMap map[string]*SolrNodeContents, solrCloud *solr.SolrCloud) {
sort.SliceStable(outOfDatePods, func(i, j int) bool {
// First sort by if the node is in the ClusterState
@@ -204,6 +211,11 @@
return true
}
+ // If the nodes have the same number of replicas, then prioritize if one node is not live.
+ if nodeI.live != nodeJ.live {
+ return !nodeI.live
+ }
+
// If both nodes are in the ClusterState and not overseerLeader, then prioritize the one with less leaders.
if nodeI.leaders != nodeJ.leaders {
return nodeI.leaders < nodeJ.leaders
@@ -219,11 +231,6 @@
return nodeI.replicas < nodeJ.replicas
}
- // If the nodes have the same number of replicas, then prioritize if one node is not live.
- if nodeI.live != nodeJ.live {
- return !nodeI.live
- }
-
// Lastly break any ties by a comparison of the name
return nodeI.nodeName > nodeJ.nodeName
})
diff --git a/controllers/util/solr_update_util_test.go b/controllers/util/solr_update_util_test.go
index a084bd4..ad9fd92 100644
--- a/controllers/util/solr_update_util_test.go
+++ b/controllers/util/solr_update_util_test.go
@@ -23,6 +23,7 @@
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
+ "strconv"
"testing"
)
@@ -98,7 +99,7 @@
// Test the maxBatchNodeUpgradeSpec
maxshardReplicasUnavailable = intstr.FromInt(1)
podsToUpgrade = getPodNames(pickPodsToUpdate(solrCloud, allPods, testRecoveringClusterStatus, overseerLeader, 6, 1, log))
- assert.ElementsMatch(t, []string{"pod-6"}, podsToUpgrade, "Incorrect set of next pods to upgrade. Only 1 node should be upgraded when maxBatchNodeUpgradeSpec=1")
+ assert.ElementsMatch(t, []string{"pod-4"}, podsToUpgrade, "Incorrect set of next pods to upgrade. Only 1 node should be upgraded when maxBatchNodeUpgradeSpec=1, and it should be the non-live node.")
// Test the maxShardReplicasDownSpec
maxshardReplicasUnavailable = intstr.FromInt(2)
@@ -163,17 +164,9 @@
},
}
- pods := []corev1.Pod{
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-0"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-1"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-2"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-3"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-4"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-5"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-6"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-7"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-8"}, Spec: corev1.PodSpec{}},
- {ObjectMeta: metav1.ObjectMeta{Name: "pod-9"}, Spec: corev1.PodSpec{}},
+ pods := make([]corev1.Pod, 13)
+ for i := range pods {
+ pods[i] = corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-" + strconv.Itoa(i)}, Spec: corev1.PodSpec{}}
}
nodeMap := map[string]*SolrNodeContents{
@@ -252,17 +245,25 @@
overseerLeader: false,
live: true,
},
- SolrNodeName(solrCloud, pods[9]): {
- nodeName: SolrNodeName(solrCloud, pods[9]),
- leaders: 3,
- replicas: 20,
- notDownReplicas: 9,
+ SolrNodeName(solrCloud, pods[10]): {
+ nodeName: SolrNodeName(solrCloud, pods[10]),
+ leaders: 0,
+ replicas: 0,
+ notDownReplicas: 0,
+ overseerLeader: false,
+ live: false,
+ },
+ SolrNodeName(solrCloud, pods[11]): {
+ nodeName: SolrNodeName(solrCloud, pods[11]),
+ leaders: 0,
+ replicas: 0,
+ notDownReplicas: 0,
overseerLeader: false,
live: true,
},
}
- expectedOrdering := []string{"pod-2", "pod-9", "pod-3", "pod-6", "pod-8", "pod-7", "pod-5", "pod-1", "pod-4", "pod-0"}
+ expectedOrdering := []string{"pod-12", "pod-9", "pod-10", "pod-6", "pod-1", "pod-11", "pod-2", "pod-3", "pod-8", "pod-7", "pod-5", "pod-4", "pod-0"}
sortNodePodsBySafety(pods, nodeMap, solrCloud)
foundOrdering := make([]string, len(pods))
@@ -434,28 +435,38 @@
},
}
- foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate := calculateMaxPodsToUpdate(solrCloud, 10, 4, 4)
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate := calculateMaxPodsToUpdate(solrCloud, 10, 4, 0, 4)
assert.Equal(t, 2, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromInt(2)")
assert.Equal(t, 2, foundUnavailableUpdatedPodCount, "Incorrect value of unavailableUpdatedPodCount")
assert.Equal(t, 0, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
- foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 4, 3)
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 4, 0, 3)
assert.Equal(t, 2, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromInt(2)")
assert.Equal(t, 3, foundUnavailableUpdatedPodCount, "Incorrect value of unavailableUpdatedPodCount")
assert.Equal(t, -1, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 1, 3)
+ assert.Equal(t, 2, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromInt(2)")
+ assert.Equal(t, 3, foundUnavailableUpdatedPodCount, "Incorrect value of unavailableUpdatedPodCount")
+ assert.Equal(t, -2, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
+
maxPodsUnavailable = intstr.FromString("45%")
- foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 5)
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 0, 5)
assert.Equal(t, 4, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromString(\"45%\")")
assert.Equal(t, 2, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
+ maxPodsUnavailable = intstr.FromString("45%")
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 1, 2, 5)
+ assert.Equal(t, 4, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromString(\"45%\")")
+ assert.Equal(t, 0, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
+
maxPodsUnavailable = intstr.FromString("70%")
- foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 2)
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 0, 2)
assert.Equal(t, 7, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromString(\"70%\")")
assert.Equal(t, 2, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
solrCloud.Spec.UpdateStrategy.ManagedUpdateOptions.MaxPodsUnavailable = nil
- foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 2)
+ foundMaxPodsUnavailable, foundUnavailableUpdatedPodCount, foundMaxPodsToUpdate = calculateMaxPodsToUpdate(solrCloud, 10, 3, 0, 2)
assert.Equal(t, 2, foundMaxPodsUnavailable, "Incorrect value of maxPodsUnavailable given fromString(\"25%\")")
assert.Equal(t, -3, foundMaxPodsToUpdate, "Incorrect value of maxPodsToUpdate")
}
diff --git a/controllers/util/solr_util.go b/controllers/util/solr_util.go
index 1e17c6b..346ce5d 100644
--- a/controllers/util/solr_util.go
+++ b/controllers/util/solr_util.go
@@ -33,6 +33,8 @@
SolrClientPortName = "solr-client"
BackupRestoreVolume = "backup-restore"
+ SolrNodeContainer = "solrcloud-node"
+
SolrStorageFinalizer = "storage.finalizers.solr.apache.org"
SolrZKConnectionStringAnnotation = "solr.apache.org/zkConnectionString"
SolrPVCTechnologyLabel = "solr.apache.org/technology"
@@ -42,7 +44,7 @@
SolrPVCInstanceLabel = "solr.apache.org/instance"
SolrXmlMd5Annotation = "solr.apache.org/solrXmlMd5"
- DefaultStatefulSetPodManagementPolicy = appsv1.OrderedReadyPodManagement
+ DefaultStatefulSetPodManagementPolicy = appsv1.ParallelPodManagement
DefaultLivenessProbeInitialDelaySeconds = 20
DefaultLivenessProbeTimeoutSeconds = 1
@@ -365,7 +367,7 @@
containers := []corev1.Container{
{
- Name: "solrcloud-node",
+ Name: SolrNodeContainer,
Image: solrCloud.Spec.SolrImage.ToImageName(),
ImagePullPolicy: solrCloud.Spec.SolrImage.PullPolicy,
Ports: []corev1.ContainerPort{
diff --git a/docs/solr-cloud/managed-updates.md b/docs/solr-cloud/managed-updates.md
index 23726ac..e7b6f63 100644
--- a/docs/solr-cloud/managed-updates.md
+++ b/docs/solr-cloud/managed-updates.md
@@ -10,11 +10,17 @@
The logic goes as follows:
-1. Find the pods that are not up-to-date
+1. Find the pods that are out-of-date
+1. Update all out-of-date pods that do not have a started Solr container.
+ - This allows for updating a pod that cannot start, even if other pods are not available.
+ - This step does not respect the `maxPodsUnavailable` option, because these pods have not even started the Solr process.
1. Retrieve the cluster state of the SolrCloud if there are any `ready` pods.
- If no pods are ready, then there is no endpoint to retrieve the cluster state from.
1. Sort the pods in order of safety for being restarted. [Sorting order reference](#pod-update-sorting-order)
1. Iterate through the sorted pods, greedily choosing which pods to update. [Selection logic reference](#pod-update-selection-logic)
+ - The maximum number of pods that can be updated are determined by starting with `maxPodsUnavailable`,
+ then subtracting the number of updated pods that are unavailable as well as the number of not-yet-started, out-of-date pods that were updated in a previous step.
+ This check makes sure that any pods taken down during this step do not violate the `maxPodsUnavailable` constraint.
### Pod Update Sorting Order
diff --git a/helm/solr-operator/crds/crds.yaml b/helm/solr-operator/crds/crds.yaml
index a5066f7..678c196 100644
--- a/helm/solr-operator/crds/crds.yaml
+++ b/helm/solr-operator/crds/crds.yaml
@@ -5403,7 +5403,7 @@
description: Labels to be added for the StatefulSet.
type: object
podManagementPolicy:
- description: PodManagementPolicy defines the policy for creating pods under a stateful set. Override the default value of OrderedReady. This cannot be updated on an existing StatefulSet, the StatefulSet must be deleted and recreated for a change in this field to take effect.
+ description: PodManagementPolicy defines the policy for creating pods under a stateful set. Override the default value of Parallel. This cannot be updated on an existing StatefulSet, the StatefulSet must be deleted and recreated for a change in this field to take effect.
enum:
- OrderedReady
- Parallel