Add in a retry queue for clusterOps (#596)
diff --git a/controllers/solr_cluster_ops_util.go b/controllers/solr_cluster_ops_util.go
index c642dc3..96859a0 100644
--- a/controllers/solr_cluster_ops_util.go
+++ b/controllers/solr_cluster_ops_util.go
@@ -19,6 +19,7 @@
import (
"context"
+ "encoding/json"
"errors"
solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
"github.com/apache/solr-operator/controllers/util"
@@ -26,6 +27,7 @@
"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
"net/url"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -33,7 +35,116 @@
"time"
)
-func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterLockAcquired bool, retryLaterDuration time.Duration, err error) {
+// SolrClusterOp contains metadata for cluster operations performed on SolrClouds.
+type SolrClusterOp struct {
+ // The type of Cluster Operation
+ Operation SolrClusterOperationType `json:"operation"`
+
+ // Time that the Cluster Operation was started or re-started
+ LastStartTime metav1.Time `json:"lastStartTime"`
+
+ // Time that the Cluster Operation was started or re-started
+ Metadata string `json:"metadata"`
+}
+type SolrClusterOperationType string
+
+const (
+ ScaleDownLock SolrClusterOperationType = "ScalingDown"
+ ScaleUpLock SolrClusterOperationType = "ScalingUp"
+ UpdateLock SolrClusterOperationType = "RollingUpdate"
+)
+
+func clearClusterOpLock(statefulSet *appsv1.StatefulSet) {
+ delete(statefulSet.Annotations, util.ClusterOpsLockAnnotation)
+}
+
+func setClusterOpLock(statefulSet *appsv1.StatefulSet, op SolrClusterOp) error {
+ bytes, err := json.Marshal(op)
+ if err != nil {
+ return err
+ }
+ statefulSet.Annotations[util.ClusterOpsLockAnnotation] = string(bytes)
+ return nil
+}
+
+func setClusterOpRetryQueue(statefulSet *appsv1.StatefulSet, queue []SolrClusterOp) error {
+ if len(queue) > 0 {
+ bytes, err := json.Marshal(queue)
+ if err != nil {
+ return err
+ }
+ statefulSet.Annotations[util.ClusterOpsRetryQueueAnnotation] = string(bytes)
+ } else {
+ delete(statefulSet.Annotations, util.ClusterOpsRetryQueueAnnotation)
+ }
+ return nil
+}
+
+func GetCurrentClusterOp(statefulSet *appsv1.StatefulSet) (clusterOp *SolrClusterOp, err error) {
+ if op, hasOp := statefulSet.Annotations[util.ClusterOpsLockAnnotation]; hasOp {
+ clusterOp = &SolrClusterOp{}
+ err = json.Unmarshal([]byte(op), clusterOp)
+ }
+ return
+}
+
+func GetClusterOpRetryQueue(statefulSet *appsv1.StatefulSet) (clusterOpQueue []SolrClusterOp, err error) {
+ if op, hasOp := statefulSet.Annotations[util.ClusterOpsRetryQueueAnnotation]; hasOp {
+ err = json.Unmarshal([]byte(op), &clusterOpQueue)
+ }
+ return
+}
+
+func enqueueCurrentClusterOpForRetry(statefulSet *appsv1.StatefulSet) (hasOp bool, err error) {
+ clusterOp, err := GetCurrentClusterOp(statefulSet)
+ if err != nil || clusterOp == nil {
+ return false, err
+ }
+ clusterOpRetryQueue, err := GetClusterOpRetryQueue(statefulSet)
+ if err != nil {
+ return true, err
+ }
+ clusterOpRetryQueue = append(clusterOpRetryQueue, *clusterOp)
+ clearClusterOpLock(statefulSet)
+ return true, setClusterOpRetryQueue(statefulSet, clusterOpRetryQueue)
+}
+
+func retryNextQueuedClusterOp(statefulSet *appsv1.StatefulSet) (hasOp bool, err error) {
+ clusterOpRetryQueue, err := GetClusterOpRetryQueue(statefulSet)
+ if err != nil {
+ return hasOp, err
+ }
+ hasOp = len(clusterOpRetryQueue) > 0
+ if len(clusterOpRetryQueue) > 0 {
+ nextOp := clusterOpRetryQueue[0]
+ nextOp.LastStartTime = metav1.Now()
+ err = setClusterOpLock(statefulSet, nextOp)
+ if err != nil {
+ return hasOp, err
+ }
+ err = setClusterOpRetryQueue(statefulSet, clusterOpRetryQueue[1:])
+ }
+ return hasOp, err
+}
+
+func retryNextQueuedClusterOpWithQueue(statefulSet *appsv1.StatefulSet, clusterOpQueue []SolrClusterOp) (hasOp bool, err error) {
+ if err != nil {
+ return hasOp, err
+ }
+ hasOp = len(clusterOpQueue) > 0
+ if len(clusterOpQueue) > 0 {
+ nextOp := clusterOpQueue[0]
+ nextOp.LastStartTime = metav1.Now()
+ err = setClusterOpLock(statefulSet, nextOp)
+ if err != nil {
+ return hasOp, err
+ }
+ err = setClusterOpRetryQueue(statefulSet, clusterOpQueue[1:])
+ }
+ return hasOp, err
+}
+
+func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownOpIsQueued bool, podList []corev1.Pod, logger logr.Logger) (clusterOp *SolrClusterOp, retryLaterDuration time.Duration, err error) {
desiredPods := int(*instance.Spec.Replicas)
configuredPods := int(*statefulSet.Spec.Replicas)
if desiredPods != configuredPods {
@@ -45,61 +156,63 @@
if len(podList) > configuredPods {
// There are too many pods, the statefulSet controller has yet to delete unwanted pods.
// Do not start the scale down until these extra pods are deleted.
- return false, time.Second * 5, nil
+ return nil, time.Second * 5, nil
}
-
- // Managed Scale down!
- originalStatefulSet := statefulSet.DeepCopy()
- statefulSet.Annotations[util.ClusterOpsLockAnnotation] = util.ScaleDownLock
- // The scaleDown metadata is the number of nodes to scale down to.
- // We only support scaling down one pod at-a-time when using a managed scale-down.
- // If the user wishes to scale down by multiple nodes, this ClusterOp will be done once-per-node.
- statefulSet.Annotations[util.ClusterOpsMetadataAnnotation] = strconv.Itoa(configuredPods - 1)
- if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to start clusterOp", "clusterOp", util.ScaleDownLock, "clusterOpMetadata", configuredPods-1)
- } else {
- clusterLockAcquired = true
+ clusterOp = &SolrClusterOp{
+ Operation: ScaleDownLock,
+ Metadata: strconv.Itoa(configuredPods - 1),
}
} else if desiredPods > configuredPods && (instance.Spec.Scaling.PopulatePodsOnScaleUp == nil || *instance.Spec.Scaling.PopulatePodsOnScaleUp) {
if len(podList) < configuredPods {
// There are not enough pods, the statefulSet controller has yet to create the previously desired pods.
// Do not start the scale up until these missing pods are created.
- return false, time.Second * 5, nil
+ return nil, time.Second * 5, nil
}
- // Managed Scale up!
- originalStatefulSet := statefulSet.DeepCopy()
- statefulSet.Annotations[util.ClusterOpsLockAnnotation] = util.ScaleUpLock
- // The scaleUp metadata is the number of nodes that existed before the scaleUp.
- // This allows the scaleUp operation to know which pods will be empty after the statefulSet is scaledUp.
- statefulSet.Annotations[util.ClusterOpsMetadataAnnotation] = strconv.Itoa(configuredPods)
- // We want to set the number of replicas at the beginning of the scaleUp operation
- statefulSet.Spec.Replicas = pointer.Int32(int32(desiredPods))
- if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to start clusterOp", "clusterOp", util.ScaleUpLock, "clusterOpMetadata", configuredPods, "newStatefulSetSize", desiredPods)
- } else {
- clusterLockAcquired = true
+ clusterOp = &SolrClusterOp{
+ Operation: ScaleUpLock,
+ Metadata: strconv.Itoa(desiredPods),
}
} else {
err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger)
}
+ } else if scaleDownOpIsQueued {
+ // If the statefulSet and the solrCloud have the same number of pods configured, and the queued operation is a scaleDown,
+ // that means the scaleDown was reverted. So there's no reason to change the number of pods.
+ // However, a Replica Balancing should be done just in case, so do a ScaleUp, but don't change the number of pods.
+ clusterOp = &SolrClusterOp{
+ Operation: ScaleUpLock,
+ Metadata: strconv.Itoa(desiredPods),
+ }
}
return
}
// handleManagedCloudScaleDown does the logic of a managed and "locked" cloud scale down operation.
// This will likely take many reconcile loops to complete, as it is moving replicas away from the pods that will be scaled down.
-func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleDownToRaw string, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+func handleManagedCloudScaleDown(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, clusterOp *SolrClusterOp, podList []corev1.Pod, logger logr.Logger) (operationComplete bool, requestInProgress bool, retryLaterDuration time.Duration, err error) {
var scaleDownTo int
- if scaleDownTo, err = strconv.Atoi(scaleDownToRaw); err != nil {
- logger.Error(err, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scaleDownToRaw)
+ if scaleDownTo, err = strconv.Atoi(clusterOp.Metadata); err != nil {
+ logger.Error(err, "Could not convert ScaleDown metadata to int, as it represents the number of nodes to scale to", "metadata", clusterOp.Metadata)
return
// TODO: Create event for the CRD.
}
- if scaleDownTo >= int(*statefulSet.Spec.Replicas) {
+ if len(podList) <= scaleDownTo {
+ // The number of pods is less than we are trying to scaleDown to, so we are done
+ return true, false, 0, nil
+ }
+ if int(*statefulSet.Spec.Replicas) <= scaleDownTo {
+ // We've done everything we need to do at this point. We just need to wait until the pods are deleted to be "done".
+ // So return and wait for the next reconcile loop, whenever it happens
+ return false, false, time.Second, nil
+ }
+ // TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.
+ if int(*statefulSet.Spec.Replicas) > scaleDownTo+1 {
// This shouldn't happen, but we don't want to be stuck if it does.
- // Just remove the cluster Op, because the cluster has already been scaled down.
- err = clearClusterOp(ctx, r, statefulSet, "statefulSet already scaled-down", logger)
+ // Just remove the cluster Op, because the cluster is bigger than it should be.
+ // We will retry the whole thing again, with the right metadata this time
+ operationComplete = true
+ return true, false, time.Second, nil
}
// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
@@ -111,62 +224,75 @@
},
}
- // TODO: It would be great to support a multi-node scale down when Solr supports evicting many SolrNodes at once.
// Only evict the last pod, even if we are trying to scale down multiple pods.
// Scale down will happen one pod at a time.
- if replicaManagementComplete, evictErr := evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger); err != nil {
- err = evictErr
- } else if replicaManagementComplete {
- originalStatefulSet := statefulSet.DeepCopy()
- statefulSet.Spec.Replicas = pointer.Int32(int32(scaleDownTo))
- delete(statefulSet.Annotations, util.ClusterOpsLockAnnotation)
- delete(statefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
- if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to finish the managed SolrCloud scale down clusterOp", "newStatefulSetReplicas", scaleDownTo)
+ var replicaManagementComplete bool
+ if replicaManagementComplete, requestInProgress, err = evictSinglePod(ctx, r, instance, scaleDownTo, podList, podStoppedReadinessConditions, logger); err == nil {
+ if replicaManagementComplete {
+ originalStatefulSet := statefulSet.DeepCopy()
+ statefulSet.Spec.Replicas = pointer.Int32(int32(scaleDownTo))
+ if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
+ logger.Error(err, "Error while patching StatefulSet to scale down pods after eviction", "newStatefulSetReplicas", scaleDownTo)
+ }
+ // Return and wait for the pods to be created, which will call another reconcile
+ retryLaterDuration = 0
+ } else {
+ // Retry after five seconds to check if the replica management commands have been completed
+ retryLaterDuration = time.Second * 5
}
-
- // TODO: Create event for the CRD.
- } else {
- // Retry after five seconds to check if the replica management commands have been completed
- retryLaterDuration = time.Second * 5
}
return
}
// handleManagedCloudScaleUp does the logic of a managed and "locked" cloud scale up operation.
// This will likely take many reconcile loops to complete, as it is moving replicas to the pods that have recently been scaled up.
-func handleManagedCloudScaleUp(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, scaleUpFromRaw string, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
- // TODO: Think about bad pod specs, that will never come up healthy. We want to try a rolling restart in between if necessary
- if balanceComplete, balanceErr := util.BalanceReplicasForCluster(ctx, instance, statefulSet, "scaleUp", scaleUpFromRaw, logger); err != nil {
- err = balanceErr
- } else if balanceComplete {
- // Once the replica balancing is complete, finish the cluster operation by deleting the statefulSet annotations
+func handleManagedCloudScaleUp(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, clusterOp *SolrClusterOp, podList []corev1.Pod, logger logr.Logger) (operationComplete bool, requestInProgress bool, retryLaterDuration time.Duration, err error) {
+ desiredPods, err := strconv.Atoi(clusterOp.Metadata)
+ if err != nil {
+ logger.Error(err, "Could not convert ScaleUp metadata to int, as it represents the number of nodes to scale to", "metadata", clusterOp.Metadata)
+ return
+ }
+ configuredPods := int(*statefulSet.Spec.Replicas)
+ if configuredPods < desiredPods {
+ // The first thing to do is increase the number of pods the statefulSet is running
originalStatefulSet := statefulSet.DeepCopy()
- delete(statefulSet.Annotations, util.ClusterOpsLockAnnotation)
- delete(statefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
- if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to finish the managed SolrCloud scale up clusterOp")
- }
+ statefulSet.Spec.Replicas = pointer.Int32(int32(desiredPods))
- // TODO: Create event for the CRD.
+ err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet))
+ if err != nil {
+ logger.Error(err, "Error while patching StatefulSet to increase the number of pods for the ScaleUp")
+ }
+ // Return and wait for the pods to be created, which will call another reconcile
+ return false, false, 0, err
} else {
- // Retry after five seconds to check if the replica management commands have been completed
- retryLaterDuration = time.Second * 5
+ // Before doing anything to the pod, make sure that the pods do not have a stopped readiness condition
+ readinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
+ util.SolrIsNotStoppedReadinessCondition: {
+ reason: PodStarted,
+ message: "Pod is not being deleted, traffic to the pod must be started",
+ status: true,
+ },
+ }
+ for _, pod := range podList {
+ if updatedPod, e := EnsurePodReadinessConditions(ctx, r, &pod, readinessConditions, logger); e != nil {
+ err = e
+ return
+ } else {
+ pod = *updatedPod
+ }
+ }
+ if operationComplete, requestInProgress, err = util.BalanceReplicasForCluster(ctx, instance, statefulSet, "scaleUp", clusterOp.Metadata, logger); !operationComplete && err == nil {
+ // Retry after five seconds to check if the replica management commands have been completed
+ retryLaterDuration = time.Second * 5
+ }
}
return
}
-func determineRollingUpdateClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, outOfDatePods util.OutOfDatePodSegmentation, logger logr.Logger) (clusterLockAcquired bool, retryLaterDuration time.Duration, err error) {
+func determineRollingUpdateClusterOpLockIfNecessary(instance *solrv1beta1.SolrCloud, outOfDatePods util.OutOfDatePodSegmentation) (clusterOp *SolrClusterOp, retryLaterDuration time.Duration, err error) {
if instance.Spec.UpdateStrategy.Method == solrv1beta1.ManagedUpdate && !outOfDatePods.IsEmpty() {
- // Managed Rolling Upgrade!
- originalStatefulSet := statefulSet.DeepCopy()
- statefulSet.Annotations[util.ClusterOpsLockAnnotation] = util.UpdateLock
- // No rolling update metadata is currently required
- statefulSet.Annotations[util.ClusterOpsMetadataAnnotation] = ""
- if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to start clusterOp", "clusterOp", util.UpdateLock, "clusterOpMetadata", "")
- } else {
- clusterLockAcquired = true
+ clusterOp = &SolrClusterOp{
+ Operation: UpdateLock,
}
}
return
@@ -174,21 +300,19 @@
// handleManagedCloudRollingUpdate does the logic of a managed and "locked" cloud rolling update operation.
// This will take many reconcile loops to complete, as it is deleting pods/moving replicas.
-func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, outOfDatePods util.OutOfDatePodSegmentation, hasReadyPod bool, availableUpdatedPodCount int, logger logr.Logger) (retryLaterDuration time.Duration, err error) {
+func handleManagedCloudRollingUpdate(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, outOfDatePods util.OutOfDatePodSegmentation, hasReadyPod bool, availableUpdatedPodCount int, logger logr.Logger) (operationComplete bool, requestInProgress bool, retryLaterDuration time.Duration, err error) {
// Manage the updating of out-of-spec pods, if the Managed UpdateStrategy has been specified.
updateLogger := logger.WithName("ManagedUpdateSelector")
- // First check if all pods are up to date. If so the rolling update is complete
- if outOfDatePods.IsEmpty() {
- // Once the rolling update is complete, finish the cluster operation by deleting the statefulSet annotations
- originalStatefulSet := statefulSet.DeepCopy()
- delete(statefulSet.Annotations, util.ClusterOpsLockAnnotation)
- delete(statefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
- if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to finish the managed SolrCloud rollingUpdate clusterOp")
- }
-
- // TODO: Create event for the CRD.
+ // First check if all pods are up to date and ready. If so the rolling update is complete
+ configuredPods := int(*statefulSet.Spec.Replicas)
+ if configuredPods == availableUpdatedPodCount {
+ // The configured number of pods are all healthy and up to date. The operation is complete
+ operationComplete = true
+ return
+ } else if outOfDatePods.IsEmpty() {
+ // Just return and wait for the updated pods to come up healthy, these will call new reconciles, so there is nothing for us to do
+ return
} else {
// The out of date pods that have not been started, should all be updated immediately.
// There is no use "safely" updating pods which have not been started yet.
@@ -211,7 +335,8 @@
// Only actually delete a running pod if it has been evicted, or doesn't need eviction (persistent storage)
for _, pod := range podsToUpdate {
- retryLaterDurationTemp, errTemp := DeletePodForUpdate(ctx, r, instance, &pod, podsHaveReplicas[pod.Name], updateLogger)
+ retryLaterDurationTemp, inProgTmp, errTemp := DeletePodForUpdate(ctx, r, instance, &pod, podsHaveReplicas[pod.Name], updateLogger)
+ requestInProgress = requestInProgress || inProgTmp
// Use the retryLaterDuration of the pod that requires a retry the soonest (smallest duration > 0)
if retryLaterDurationTemp > 0 && (retryLaterDurationTemp < retryLaterDuration || retryLaterDuration == 0) {
@@ -221,7 +346,6 @@
err = errTemp
}
}
-
if retryLater && retryLaterDuration == 0 {
retryLaterDuration = time.Second * 10
}
@@ -229,21 +353,50 @@
return
}
-// clearClusterOp simply removes any clusterOp for the given statefulSet.
-// This should only be used as a "break-glass" scenario. Do not use this to finish off successful clusterOps.
-func clearClusterOp(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, reason string, logger logr.Logger) (err error) {
- logger = logger.WithValues("reason", reason, "clusterOp", statefulSet.Annotations[util.ClusterOpsLockAnnotation], "clusterOpMetadata", statefulSet.Annotations[util.ClusterOpsMetadataAnnotation])
+// clearClusterOpLockWithPatch simply removes any clusterOp for the given statefulSet.
+func clearClusterOpLockWithPatch(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, reason string, logger logr.Logger) (err error) {
originalStatefulSet := statefulSet.DeepCopy()
- delete(statefulSet.Annotations, util.ClusterOpsLockAnnotation)
- delete(statefulSet.Annotations, util.ClusterOpsMetadataAnnotation)
+ clearClusterOpLock(statefulSet)
if err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet)); err != nil {
- logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation")
+ logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterOpLock annotation", "reason", reason)
} else {
- logger.Error(err, "Removed unneeded clusterLockOp annotation from statefulSet")
+ logger.Info("Removed unneeded clusterOpLock annotation from statefulSet", "reason", reason)
}
return
}
+// enqueueCurrentClusterOpForRetryWithPatch adds the current clusterOp to the clusterOpRetryQueue, and clears the current cluster Op.
+// This method will send the StatefulSet patch to the API Server.
+func enqueueCurrentClusterOpForRetryWithPatch(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, reason string, logger logr.Logger) (err error) {
+ originalStatefulSet := statefulSet.DeepCopy()
+ hasOp, err := enqueueCurrentClusterOpForRetry(statefulSet)
+ if hasOp && err == nil {
+ err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet))
+ }
+ if err != nil {
+ logger.Error(err, "Error while patching StatefulSet to enqueue clusterOp for retry", "reason", reason)
+ } else if hasOp {
+ logger.Info("Enqueued current clusterOp to continue later", "reason", reason)
+ }
+ return err
+}
+
+// retryNextQueuedClusterOpWithPatch removes the first clusterOp from the clusterOpRetryQueue, and sets it as the current cluster Op.
+// This method will send the StatefulSet patch to the API Server.
+func retryNextQueuedClusterOpWithPatch(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, clusterOpQueue []SolrClusterOp, logger logr.Logger) (err error) {
+ originalStatefulSet := statefulSet.DeepCopy()
+ hasOp, err := retryNextQueuedClusterOpWithQueue(statefulSet, clusterOpQueue)
+ if hasOp && err == nil {
+ err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet))
+ }
+ if err != nil {
+ logger.Error(err, "Error while patching StatefulSet to retry next queued clusterOp")
+ } else if hasOp {
+ logger.Info("Retrying next queued clusterOp")
+ }
+ return err
+}
+
// scaleCloudUnmanaged does simple scaling of a SolrCloud without moving replicas.
// This is not a "locked" cluster operation, and does not block other cluster operations from taking place.
func scaleCloudUnmanaged(ctx context.Context, r *SolrCloudReconciler, statefulSet *appsv1.StatefulSet, scaleTo int, logger logr.Logger) (err error) {
@@ -283,7 +436,7 @@
return
}
-func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, scaleDownTo int, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podIsEmpty bool, err error) {
+func evictSinglePod(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, scaleDownTo int, podList []corev1.Pod, readinessConditions map[corev1.PodConditionType]podReadinessConditionChange, logger logr.Logger) (podIsEmpty bool, requestInProgress bool, err error) {
var pod *corev1.Pod
podName := instance.GetSolrPodName(scaleDownTo)
for _, p := range podList {
@@ -295,14 +448,14 @@
podHasReplicas := true
if replicas, e := getReplicasForPod(ctx, instance, podName, logger); e != nil {
- return false, e
+ return false, false, e
} else {
podHasReplicas = len(replicas) > 0
}
// The pod doesn't exist, we cannot empty it
if pod == nil {
- return !podHasReplicas, errors.New("Could not find pod " + podName + " when trying to migrate replicas to scale down pod.")
+ return !podHasReplicas, false, errors.New("Could not find pod " + podName + " when trying to migrate replicas to scale down pod.")
}
if updatedPod, e := EnsurePodReadinessConditions(ctx, r, pod, readinessConditions, logger); e != nil {
@@ -313,8 +466,8 @@
}
// Only evict from the pod if it contains replicas in the clusterState
- if e, canDeletePod := util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "scaleDown", logger); e != nil {
- err = e
+ var canDeletePod bool
+ if err, canDeletePod, requestInProgress = util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "scaleDown", logger); err != nil {
logger.Error(err, "Error while evicting replicas on Pod, when scaling down SolrCloud", "pod", pod.Name)
} else if canDeletePod {
// The pod previously had replicas, so loop back in the next reconcile to make sure that the pod doesn't
diff --git a/controllers/solr_pod_lifecycle_util.go b/controllers/solr_pod_lifecycle_util.go
index 203fc13..8af455b 100644
--- a/controllers/solr_pod_lifecycle_util.go
+++ b/controllers/solr_pod_lifecycle_util.go
@@ -46,7 +46,7 @@
ScaleDown PodConditionChangeReason = "ScaleDown"
)
-func DeletePodForUpdate(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (requeueAfterDuration time.Duration, err error) {
+func DeletePodForUpdate(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (requeueAfterDuration time.Duration, requestInProgress bool, err error) {
// Before doing anything to the pod, make sure that users cannot send requests to the pod anymore.
ps := PodStarted
podStoppedReadinessConditions := map[corev1.PodConditionType]podReadinessConditionChange{
@@ -75,10 +75,12 @@
deletePod := false
if PodConditionEquals(pod, util.SolrReplicasNotEvictedReadinessCondition, EvictingReplicas) {
// Only evict pods that contain replicas in the clusterState
- if evictError, canDeletePod := util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "podUpdate", logger); evictError != nil {
+ if evictError, canDeletePod, inProgTmp := util.EvictReplicasForPodIfNecessary(ctx, instance, pod, podHasReplicas, "podUpdate", logger); evictError != nil {
+ requestInProgress = true
err = evictError
logger.Error(err, "Error while evicting replicas on pod", "pod", pod.Name)
} else if canDeletePod {
+ requestInProgress = inProgTmp
if podHasReplicas {
// The pod previously had replicas, so loop back in the next reconcile to make sure that the pod doesn't
// have replicas anymore even if the previous evict command was successful.
@@ -88,6 +90,7 @@
deletePod = true
}
} else {
+ requestInProgress = inProgTmp
// Try again in 5 seconds if we cannot delete a pod.
requeueAfterDuration = time.Second * 5
}
diff --git a/controllers/solrcloud_controller.go b/controllers/solrcloud_controller.go
index 56fdbd7..100522f 100644
--- a/controllers/solrcloud_controller.go
+++ b/controllers/solrcloud_controller.go
@@ -453,37 +453,115 @@
// Update or Scale, one-at-a-time. We do not want to do both.
hasReadyPod := newStatus.ReadyReplicas > 0
var retryLaterDuration time.Duration
- if clusterOpLock, hasAnn := statefulSet.Annotations[util.ClusterOpsLockAnnotation]; hasAnn {
- clusterOpMetadata := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]
- switch clusterOpLock {
- case util.UpdateLock:
- retryLaterDuration, err = handleManagedCloudRollingUpdate(ctx, r, instance, statefulSet, outOfDatePods, hasReadyPod, availableUpdatedPodCount, logger)
- case util.ScaleDownLock:
- retryLaterDuration, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, clusterOpMetadata, podList, logger)
- case util.ScaleUpLock:
- retryLaterDuration, err = handleManagedCloudScaleUp(ctx, r, instance, statefulSet, clusterOpMetadata, logger)
+ if clusterOp, opErr := GetCurrentClusterOp(statefulSet); clusterOp != nil && opErr == nil {
+ var operationComplete, requestInProgress bool
+ operationFound := true
+ shortTimeoutForRequeue := true
+ switch clusterOp.Operation {
+ case UpdateLock:
+ operationComplete, requestInProgress, retryLaterDuration, err = handleManagedCloudRollingUpdate(ctx, r, instance, statefulSet, outOfDatePods, hasReadyPod, availableUpdatedPodCount, logger)
+ // Rolling Updates should not be requeued quickly. The operation is expected to take a long time and thus should have a longTimeout if errors are not seen.
+ shortTimeoutForRequeue = false
+ case ScaleDownLock:
+ operationComplete, requestInProgress, retryLaterDuration, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, clusterOp, podList, logger)
+ case ScaleUpLock:
+ operationComplete, requestInProgress, retryLaterDuration, err = handleManagedCloudScaleUp(ctx, r, instance, statefulSet, clusterOp, podList, logger)
default:
+ operationFound = false
// This shouldn't happen, but we don't want to be stuck if it does.
// Just remove the cluster Op, because the solr operator version running does not support it.
- err = clearClusterOp(ctx, r, statefulSet, "clusterOp not supported", logger)
+ err = clearClusterOpLockWithPatch(ctx, r, statefulSet, "clusterOp not supported", logger)
+ }
+ if operationFound {
+ if operationComplete {
+ // Once the operation is complete, finish the cluster operation by deleting the statefulSet annotations
+ err = clearClusterOpLockWithPatch(ctx, r, statefulSet, string(clusterOp.Operation)+" complete", logger)
+
+ // TODO: Create event for the CRD.
+ } else if !requestInProgress {
+ // If the cluster operation is in a stoppable place (not currently doing an async operation), and either:
+ // - the operation hit an error and has taken more than 1 minute
+ // - the operation has a short timeout and has taken more than 1 minute
+ // - the operation has a long timeout and has taken more than 10 minutes
+ // then continue the operation later.
+ // (it will likely immediately continue, since it is unlikely there is another operation to run)
+ clusterOpRuntime := time.Since(clusterOp.LastStartTime.Time)
+ queueForLaterReason := ""
+ if err != nil && clusterOpRuntime > time.Minute {
+ queueForLaterReason = "hit an error during operation"
+ } else if shortTimeoutForRequeue && clusterOpRuntime > time.Minute {
+ queueForLaterReason = "timed out during operation (1 minutes)"
+ } else if clusterOpRuntime > time.Minute*10 {
+ queueForLaterReason = "timed out during operation (10 minutes)"
+ }
+ if queueForLaterReason != "" {
+ err = enqueueCurrentClusterOpForRetryWithPatch(ctx, r, statefulSet, string(clusterOp.Operation)+" "+queueForLaterReason, logger)
+
+ // TODO: Create event for the CRD.
+ }
+ }
+ }
+ } else if opErr == nil {
+ if clusterOpQueue, opErr := GetClusterOpRetryQueue(statefulSet); opErr == nil {
+ queuedRetryOps := map[SolrClusterOperationType]int{}
+
+ for i, op := range clusterOpQueue {
+ queuedRetryOps[op.Operation] = i
+ }
+ // Start cluster operations if needed.
+ // The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
+ // And that lock will tell future reconcile loops that the operation needs to be done.
+ clusterOp, retryLaterDuration, err = determineRollingUpdateClusterOpLockIfNecessary(instance, outOfDatePods)
+ // If the new clusterOperation is an update to a queued clusterOp, just change the operation that is already queued
+ if queueIdx, opIsQueued := queuedRetryOps[UpdateLock]; clusterOp != nil && opIsQueued {
+ clusterOpQueue[queueIdx] = *clusterOp
+ clusterOp = nil
+ }
+
+ // If a non-managed scale needs to take place, this method will update the StatefulSet without starting
+ // a "locked" cluster operation
+ if clusterOp == nil {
+ _, scaleDownOpIsQueued := queuedRetryOps[ScaleDownLock]
+ clusterOp, retryLaterDuration, err = determineScaleClusterOpLockIfNecessary(ctx, r, instance, statefulSet, scaleDownOpIsQueued, podList, logger)
+
+ // If the new clusterOperation is an update to a queued clusterOp, just change the operation that is already queued
+ if clusterOp != nil {
+ // Only one of ScaleUp or ScaleDown can be queued at one time
+ if queueIdx, opIsQueued := queuedRetryOps[ScaleDownLock]; opIsQueued {
+ clusterOpQueue[queueIdx] = *clusterOp
+ clusterOp = nil
+ }
+ if queueIdx, opIsQueued := queuedRetryOps[ScaleUpLock]; opIsQueued {
+ clusterOpQueue[queueIdx] = *clusterOp
+ clusterOp = nil
+ }
+ }
+ }
+
+ if clusterOp != nil {
+ // Starting a locked cluster operation!
+ originalStatefulSet := statefulSet.DeepCopy()
+ clusterOp.LastStartTime = metav1.Now()
+ err = setClusterOpLock(statefulSet, *clusterOp)
+ if err == nil {
+ err = r.Patch(ctx, statefulSet, client.StrategicMergeFrom(originalStatefulSet))
+ }
+ if err != nil {
+ logger.Error(err, "Error while patching StatefulSet to start locked clusterOp", clusterOp.Operation, "clusterOpMetadata", clusterOp.Metadata)
+ } else {
+ logger.Info("Started locked clusterOp", "clusterOp", clusterOp.Operation, "clusterOpMetadata", clusterOp.Metadata)
+ }
+ } else {
+ // No new clusterOperation has been started, retry the next queued clusterOp, if there are any operations in the retry queue.
+ err = retryNextQueuedClusterOpWithPatch(ctx, r, statefulSet, clusterOpQueue, logger)
+ }
+
+ // After a lock is acquired, the reconcile will be started again because the StatefulSet is being watched for changes
+ } else {
+ err = opErr
}
} else {
- lockAcquired := false
- // Start cluster operations if needed.
- // The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
- // And that lock will tell future reconcile loops that the operation needs to be done.
- // If a non-managed scale needs to take place, this method will update the StatefulSet without starting
- // a "locked" cluster operation
- lockAcquired, retryLaterDuration, err = determineRollingUpdateClusterOpLockIfNecessary(ctx, r, instance, statefulSet, outOfDatePods, logger)
- // Start cluster operations if needed.
- // The operations will be actually run in future reconcile loops, but a clusterOpLock will be acquired here.
- // And that lock will tell future reconcile loops that the operation needs to be done.
- // If a non-managed scale needs to take place, this method will update the StatefulSet without starting
- // a "locked" cluster operation
- if !lockAcquired {
- lockAcquired, retryLaterDuration, err = determineScaleClusterOpLockIfNecessary(ctx, r, instance, statefulSet, podList, logger)
- }
- // After a lock is acquired, the reconcile will be started again because the StatefulSet is being watched
+ err = opErr
}
if err != nil && retryLaterDuration == 0 {
retryLaterDuration = time.Second * 5
diff --git a/controllers/util/solr_scale_util.go b/controllers/util/solr_scale_util.go
index 1f7be5c..a88531d 100644
--- a/controllers/util/solr_scale_util.go
+++ b/controllers/util/solr_scale_util.go
@@ -31,7 +31,7 @@
// a successful status returned from the command. So if we delete the asyncStatus, and then something happens in the operator,
// and we lose our state, then we will need to retry the balanceReplicas command. This should be ok since calling
// balanceReplicas multiple times should not be bad when the replicas for the cluster are already balanced.
-func BalanceReplicasForCluster(ctx context.Context, solrCloud *solr.SolrCloud, statefulSet *appsv1.StatefulSet, balanceReason string, balanceCmdUniqueId string, logger logr.Logger) (balanceComplete bool, err error) {
+func BalanceReplicasForCluster(ctx context.Context, solrCloud *solr.SolrCloud, statefulSet *appsv1.StatefulSet, balanceReason string, balanceCmdUniqueId string, logger logr.Logger) (balanceComplete bool, requestInProgress bool, err error) {
logger = logger.WithValues("balanceReason", balanceReason)
// If the Cloud has 1 or zero pods, there is no reason to balance replicas.
if statefulSet.Spec.Replicas == nil || *statefulSet.Spec.Replicas < 1 {
@@ -65,9 +65,11 @@
} else if apiError != nil {
err = apiError
}
- if err == nil {
+
+ if !balanceComplete && err == nil {
logger.Info("Started balancing replicas across cluster.", "requestId", requestId)
- } else {
+ requestInProgress = true
+ } else if err == nil {
logger.Error(err, "Could not balance replicas across the cluster. Will try again.")
}
}
@@ -79,6 +81,8 @@
logger.Info("Replica Balancing command completed successfully")
} else if asyncState == "failed" {
logger.Info("Replica Balancing command failed. Will try again", "message", message)
+ } else {
+ requestInProgress = true
}
// Delete the async request Id if the async request is successful or failed.
@@ -87,6 +91,7 @@
if _, err = solr_api.DeleteAsyncRequest(ctx, solrCloud, requestId); err != nil {
logger.Error(err, "Could not delete Async request status.", "requestId", requestId)
balanceComplete = false
+ requestInProgress = true
}
}
}
diff --git a/controllers/util/solr_update_util.go b/controllers/util/solr_update_util.go
index e0ca2f0..7d39cd9 100644
--- a/controllers/util/solr_update_util.go
+++ b/controllers/util/solr_update_util.go
@@ -504,7 +504,7 @@
// EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod.
// For updates this will only be called for pods using ephemeral data.
// For scale-down operations, this can be called for pods using ephemeral or persistent data.
-func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool) {
+func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool, requestInProgress bool) {
logger = logger.WithValues("evictionReason", evictionReason)
// If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else
// Otherwise, move the replicas to other pods
@@ -537,6 +537,7 @@
}
if err == nil {
logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
+ requestInProgress = true
} else {
logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again.")
}
@@ -552,6 +553,8 @@
logger.Info("Migration of all replicas off of pod before deletion complete. Pod can now be deleted.", "pod", pod.Name)
} else if asyncState == "failed" {
logger.Info("Migration of all replicas off of pod before deletion failed. Will try again.", "pod", pod.Name, "message", message)
+ } else {
+ requestInProgress = true
}
// Delete the async request Id if the async request is successful or failed.
@@ -560,9 +563,10 @@
if _, err = solr_api.DeleteAsyncRequest(ctx, solrCloud, requestId); err != nil {
logger.Error(err, "Could not delete Async request status.", "requestId", requestId)
canDeletePod = false
+ requestInProgress = true
}
}
}
}
- return err, canDeletePod
+ return err, canDeletePod, requestInProgress
}
diff --git a/controllers/util/solr_util.go b/controllers/util/solr_util.go
index 2d09d0b..88949d7 100644
--- a/controllers/util/solr_util.go
+++ b/controllers/util/solr_util.go
@@ -53,11 +53,8 @@
// Protected StatefulSet annotations
// These are to be saved on a statefulSet update
- ClusterOpsLockAnnotation = "solr.apache.org/clusterOpsLock"
- ScaleDownLock = "scalingDown"
- ScaleUpLock = "scalingUp"
- UpdateLock = "rollingUpdate"
- ClusterOpsMetadataAnnotation = "solr.apache.org/clusterOpsMetadata"
+ ClusterOpsLockAnnotation = "solr.apache.org/clusterOpsLock"
+ ClusterOpsRetryQueueAnnotation = "solr.apache.org/clusterOpsRetryQueue"
SolrIsNotStoppedReadinessCondition = "solr.apache.org/isNotStopped"
SolrReplicasNotEvictedReadinessCondition = "solr.apache.org/replicasNotEvicted"
@@ -624,8 +621,20 @@
// Cluster Operations are saved in the annotations of the SolrCloud StatefulSet.
// ClusterOps information is saved to the statefulSet independently of the general StatefulSet update.
// These annotations can also not be overridden set by the user.
- expected.Annotations[ClusterOpsLockAnnotation] = found.Annotations[ClusterOpsLockAnnotation]
- expected.Annotations[ClusterOpsMetadataAnnotation] = found.Annotations[ClusterOpsMetadataAnnotation]
+ if found.Annotations != nil {
+ if lock, hasLock := found.Annotations[ClusterOpsLockAnnotation]; hasLock {
+ if expected.Annotations == nil {
+ expected.Annotations = make(map[string]string, 1)
+ }
+ expected.Annotations[ClusterOpsLockAnnotation] = lock
+ }
+ if queue, hasQueue := found.Annotations[ClusterOpsRetryQueueAnnotation]; hasQueue {
+ if expected.Annotations == nil {
+ expected.Annotations = make(map[string]string, 1)
+ }
+ expected.Annotations[ClusterOpsRetryQueueAnnotation] = queue
+ }
+ }
// Scaling (i.e. changing) the number of replicas in the SolrCloud statefulSet is handled during the clusterOps
// section of the SolrCloud reconcile loop
diff --git a/docs/solr-cloud/cluster-operations.md b/docs/solr-cloud/cluster-operations.md
new file mode 100644
index 0000000..6b39886
--- /dev/null
+++ b/docs/solr-cloud/cluster-operations.md
@@ -0,0 +1,95 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+# Cluster Operations
+_Since v0.8.0_
+
+Solr Clouds are complex distributed systems, and thus any operations that deal with data availability should be handled with care.
+
+## Cluster Operation Locks
+
+Since cluster operations deal with Solr's index data (either the availability of it, or moving it), its safest to only allow one operation to take place at a time.
+That is why these operations must first obtain a lock on the SolrCloud before execution can be started.
+
+### Lockable Operations
+
+- [Managed Rolling Updates](managed-updates.md)
+- [Scaling Down with Replica Migrations](scaling.md#solr-pod-scale-down)
+- [Scaling Up with Replica Migrations](scaling.md#solr-pod-scale-up)
+
+### How is the Lock Implemented?
+
+The lock is implemented as an annotation on the SolrCloud's `StatefulSet`.
+The cluster operation retry queue is also implemented as an annotation.
+These locks can be viewed at the following annotation keys:
+
+- `solr.apache.org/clusterOpsLock` - The cluster operation that currently holds a lock on the SolrCloud and is executing.
+- `solr.apache.org/clusterOpsRetryQueue` - The queue of cluster operations that timed out and will be retried in order after the `clusterOpsLock` is given up.
+
+
+### Avoiding Deadlocks
+
+If all cluster operations executed without any issues, there would be no need to worry about deadlocks.
+Cluster operations give up the lock when the operation is complete, and then other operations that have been waiting can proceed.
+Unfortunately, these cluster operations can and will fail for a number of reasons:
+
+- Replicas have no other pod to be placed when moving off of a node. (Due to the [Replica Placement Plugin](https://solr.apache.org/guide/solr/latest/configuration-guide/replica-placement-plugins.html) used)
+- There are insufficient resources to create new Solr Pods.
+- The Solr Pod Template has an error and new Solr Pods cannot be started successfully.
+
+If this is the case, then we need to be able to stop the locked cluster operation if it hasn't succeeded in a certain time period.
+The cluster operation can only be stopped if there is no background task (async request) being executed in the Solr Cluster.
+Once cluster operation reaches a point at which it can stop, and the locking-timeout has been exceeded or an error was found, the cluster operation is _paused_, and added to a queue to retry later.
+The _timeout_ is different per-operation:
+- Scaling (Up or Down): **1 minute**
+- Rolling restarts: **10 minutes**
+
+Immediately afterwards, the Solr Operator sees if there are any other operations that need to take place while before the queued cluster operation is re-started.
+This allows for users to make changes to fix the reason why the cluster operation was failing.
+Examples:
+
+- **If there are insufficient resources to create new Solr Pods** \
+ The user can decrease the resource requirements in the Pod Template. \
+ This will create a `Rolling Update` cluster operation that will run once the `Scale Up` is paused. \
+ The `Scale Up` will be dequeued when the `Rolling Update` is complete, and can now complete because there are more available resources in the Kubernetes Cluster.
+
+- **Scale Down is failing because a replica from the scaled-down pod has nowhere to be moved to** \
+ The user can see this error in the logs, and know that the scale down won't work for their use case. \
+ Instead they will have to scale the SolrCloud to the number of pods that the `StatefulSet` is currently running. \
+ Once the `Scale Down` is paused, it will be replaced by a `Scale Up` operation to current number of running pods. \
+ This doesn't actually increase the number of pods, but it will issue a command to Solr to balance replicas across all pods, to make sure the cluster is well-balanced after the failed `ScaleDown`.
+
+If a queued operation is going to be retried, the Solr Operator first makes sure that its values are still valid.
+For the `Scale Down` example above, when the Solr Operator tries to restart the queued `Scale Down` operation, it sees that the `SolrCloud.Spec.Replicas` is no longer lower than the current number of Solr Pods.
+Therefore, the `Scale Down` does not need to be retried, and a "fake" `Scale Up` needs to take place.
+
+### In the case of an emergency
+
+When all else fails, and you need to stop a cluster operation, you can remove the lock annotation from the `StatefulSet` manually.
+
+Edit the StatefulSet (e.g. `kubectl edit statefulset <name>`) and remove the cluster operation lock annotation: `solr.apache.org/clusterOpsLock`
+
+This can be done via the following command:
+
+```bash
+$ kubectl annotate statefulset ${statefulSetName} solr.apache.org/clusterOpsLock-
+```
+
+This will only remove the current running cluster operation, if other cluster operations have been queued, they will be retried once the lock annotation is removed.
+Also if the operation still needs to occur to put the SolrCloud in its expected state, then the operation will be retried once a lock can be acquired.
+The only way to have the cluster operation not run again is to put the SolrCloud back to its previous state (for scaling, set `SolrCloud.Spec.replicas` to the value found in `StatefulSet.Spec.replicas`).
+If the SolrCloud requires a rolling restart, it cannot be "put back to its previous state". The only way to move forward is to either delete the `StatefulSet` (a very dangerous operation), or find a way to allow the `RollingUpdate` operation to succeed.
\ No newline at end of file
diff --git a/docs/solr-cloud/managed-updates.md b/docs/solr-cloud/managed-updates.md
index 8eda96f..b3b8c47 100644
--- a/docs/solr-cloud/managed-updates.md
+++ b/docs/solr-cloud/managed-updates.md
@@ -24,6 +24,8 @@
The operator will find all pods that have not been updated yet and choose the next set of pods to delete for an update, given the following workflow.
+Note: Managed Updates are a executed via [Cluster Operation Locks](cluster-operations.md), please refer to the documentation for more information about how these operations are executed.
+
## Pod Update Workflow
The logic goes as follows:
diff --git a/docs/solr-cloud/scaling.md b/docs/solr-cloud/scaling.md
index 73d6df9..491cbba 100644
--- a/docs/solr-cloud/scaling.md
+++ b/docs/solr-cloud/scaling.md
@@ -44,6 +44,8 @@
For now Replicas are not scaled up and down themselves, they are just moved to utilize new Solr pods or vacate soon-to-be-deleted Solr pods.
+Note: Scaling actions with replica movements are a executed via [Cluster Operation Locks](cluster-operations.md), please refer to the documentation for more information about how these operations are executed.
+
### Solr Pod Scale-Down
When the desired number of Solr Pods that should be run `SolrCloud.Spec.Replicas` is decreased,
diff --git a/helm/solr-operator/Chart.yaml b/helm/solr-operator/Chart.yaml
index 41c10ad..2977fe9 100644
--- a/helm/solr-operator/Chart.yaml
+++ b/helm/solr-operator/Chart.yaml
@@ -91,6 +91,17 @@
url: https://github.com/apache/solr-operator/issues/560
- name: Github PR
url: https://github.com/apache/solr-operator/pull/586
+ - name: Documentation
+ url: https://apache.github.io/solr-operator/docs/solr-cloud/cluster-operations.html
+ - kind: added
+ description: Cluster Operation Locks now give other operations a chance to run, every minute, to eliminate the risk of deadlocks
+ links:
+ - name: Github Issue
+ url: https://github.com/apache/solr-operator/issues/560
+ - name: Github PR
+ url: https://github.com/apache/solr-operator/pull/596
+ - name: Documentation
+ url: https://apache.github.io/solr-operator/docs/solr-cloud/cluster-operations.html#avoiding-deadlocks
artifacthub.io/images: |
- name: solr-operator
image: apache/solr-operator:v0.8.0-prerelease
diff --git a/tests/e2e/resource_utils_test.go b/tests/e2e/resource_utils_test.go
index 3c6c9fb..75bdb83 100644
--- a/tests/e2e/resource_utils_test.go
+++ b/tests/e2e/resource_utils_test.go
@@ -282,6 +282,13 @@
}).Should(MatchError("pods \""+podName+"\" not found"), "Pod exists when it should not")
}
+func expectNoPodNow(ctx context.Context, parentResource client.Object, podName string, additionalOffset ...int) {
+ ExpectWithOffset(
+ resolveOffset(additionalOffset),
+ k8sClient.Get(ctx, resourceKey(parentResource, podName), &corev1.Pod{}),
+ ).To(MatchError("pods \""+podName+"\" not found"), "Pod exists when it should not")
+}
+
func expectService(ctx context.Context, parentResource client.Object, serviceName string, selectorLables map[string]string, isHeadless bool, additionalOffset ...int) *corev1.Service {
return expectServiceWithChecks(ctx, parentResource, serviceName, selectorLables, isHeadless, nil, resolveOffset(additionalOffset))
}
diff --git a/tests/e2e/solrcloud_rolling_upgrade_test.go b/tests/e2e/solrcloud_rolling_upgrade_test.go
index c3fe789..914cb66 100644
--- a/tests/e2e/solrcloud_rolling_upgrade_test.go
+++ b/tests/e2e/solrcloud_rolling_upgrade_test.go
@@ -20,7 +20,7 @@
import (
"context"
solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
- "github.com/apache/solr-operator/controllers/util"
+ "github.com/apache/solr-operator/controllers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/util/intstr"
@@ -89,7 +89,10 @@
}
})
statefulSet := expectStatefulSet(ctx, solrCloud, solrCloud.StatefulSetName())
- Expect(statefulSet.Annotations).To(HaveKeyWithValue(util.ClusterOpsLockAnnotation, util.UpdateLock), "StatefulSet does not have a RollingUpdate lock after starting managed update.")
+ clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
+ Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a RollingUpdate lock.")
+ Expect(clusterOp.Operation).To(Equal(controllers.UpdateLock), "StatefulSet does not have a RollingUpdate lock after starting managed update.")
By("waiting for the rolling restart to complete")
// Expect the SolrCloud to be up-to-date, or in a valid restarting state
@@ -144,7 +147,9 @@
}
statefulSet = expectStatefulSet(ctx, solrCloud, solrCloud.StatefulSetName())
- Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsLockAnnotation)), "StatefulSet should not have a RollingUpdate lock after finishing a managed update.")
+ clusterOp, err = controllers.GetCurrentClusterOp(statefulSet)
+ Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ Expect(clusterOp).To(BeNil(), "StatefulSet should not have a RollingUpdate lock after finishing a managed update.")
By("checking that the collections can be queried after the restart")
queryCollection(ctx, solrCloud, solrCollection1, 0)
diff --git a/tests/e2e/solrcloud_scaling_test.go b/tests/e2e/solrcloud_scaling_test.go
index b906176..d3a0868 100644
--- a/tests/e2e/solrcloud_scaling_test.go
+++ b/tests/e2e/solrcloud_scaling_test.go
@@ -20,7 +20,7 @@
import (
"context"
solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
- "github.com/apache/solr-operator/controllers/util"
+ "github.com/apache/solr-operator/controllers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
@@ -67,36 +67,66 @@
By("waiting for the scaleDown of first pod to begin")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should still have 3 pods, because the scale down should first move Solr replicas")
- g.Expect(found.Annotations).To(HaveKeyWithValue(util.ClusterOpsLockAnnotation, util.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
- g.Expect(found.Annotations).To(HaveKeyWithValue(util.ClusterOpsMetadataAnnotation, "2"), "StatefulSet scaling lock operation has the wrong metadata.")
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Metadata).To(Equal("2"), "StatefulSet scaling lock operation has the wrong metadata.")
})
queryCollection(ctx, solrCloud, solrCollection2, 0)
By("waiting for the scaleDown of the first pod to finish")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should now have 2 pods, after the replicas have been moved off the first pod.")
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Metadata).To(Equal("2"), "StatefulSet scaling lock operation has the wrong metadata.")
})
queryCollection(ctx, solrCloud, solrCollection2, 0)
- By("waiting for the scaleDown of second pod to begin")
+ // Wait till the pod has actually been deleted
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
- g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should still have 2 pods, because the scale down should first move Solr replicas")
- g.Expect(found.Annotations).To(HaveKeyWithValue(util.ClusterOpsLockAnnotation, util.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
- g.Expect(found.Annotations).To(HaveKeyWithValue(util.ClusterOpsMetadataAnnotation, "1"), "StatefulSet scaling lock operation has the wrong metadata.")
+ g.Expect(found.Status.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should now have 2 pods, after the replicas have been moved off the first pod.")
})
- queryCollection(ctx, solrCloud, solrCollection1, 0)
+
+ By("waiting for the scaleDown of second pod to begin")
+ statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaling lock operation has the wrong metadata.")
+ })
+ // When the next scale down happens, the 3rd solr pod (ordinal 2) should be gone, and the statefulSet replicas should be 2 across the board.
+ // The first scale down should not be complete until this is done.
+ Expect(statefulSet.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should still have 2 pods configured, because the scale down should first move Solr replicas")
+ Expect(statefulSet.Status.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should only have 2 pods running, because previous pod scale down should have completely finished")
// This pod check must happen after the above clusterLock and replicas check.
// The StatefulSet controller might take a good amount of time to actually delete the pod,
// and the replica migration/cluster op might already be done by the time the first pod is deleted.
- expectNoPod(ctx, solrCloud, solrCloud.GetSolrPodName(2))
+ expectNoPodNow(ctx, solrCloud, solrCloud.GetSolrPodName(2))
+ queryCollection(ctx, solrCloud, solrCollection1, 0)
By("waiting for the scaleDown to finish")
- statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(1)), "StatefulSet should now have 1 pods, after the replicas have been moved.")
})
+ // Once the scale down actually occurs, the clusterOp is not complete. We need to wait till the last pod is deleted
+ clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
+ Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
+ Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
+ Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaling lock operation has the wrong metadata.")
+
+ // Wait for the last pod to be deleted
+ statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ g.Expect(found.Status.Replicas).To(HaveValue(BeEquivalentTo(1)), "StatefulSet should now have 1 pods, after the replicas have been moved.")
+ })
// Once the scale down actually occurs, the statefulSet annotations should already be removed
- Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsLockAnnotation)), "StatefulSet should not have a scaling lock after scaling is complete.")
- Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsMetadataAnnotation)), "StatefulSet should not have scaling lock metadata after scaling is complete.")
+ clusterOp, err = controllers.GetCurrentClusterOp(statefulSet)
+ Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ Expect(clusterOp).To(BeNil(), "StatefulSet should not have a ScaleDown lock after scaling is complete.")
expectNoPod(ctx, solrCloud, solrCloud.GetSolrPodName(1))
queryCollection(ctx, solrCloud, solrCollection1, 0)
@@ -117,9 +147,10 @@
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale down")
By("make sure scaleDown happens without a clusterLock and eventually the replicas are removed")
- statefulSet := expectStatefulSetWithConsistentChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, statefulSet *appsv1.StatefulSet) {
- g.Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsLockAnnotation)), "StatefulSet should not have a scaling lock while scaling unmanaged.")
- g.Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsMetadataAnnotation)), "StatefulSet should not have scaling lock metadata while scaling unmanaged.")
+ statefulSet := expectStatefulSetWithConsistentChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a scaling lock, since scaleDown is unmanaged.")
})
Expect(statefulSet.Spec.Replicas).To(HaveValue(BeEquivalentTo(1)), "StatefulSet should immediately have 1 pod, since the scaleDown is unmanaged.")
@@ -166,19 +197,31 @@
By("triggering a scale down via solrCloud replicas")
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale up")
- By("waiting for the scaleDown of first pod to begin")
+ By("waiting for the scaleUp to begin")
statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleUp lock.")
+ g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleUpLock), "StatefulSet does not have a scaleUp lock.")
+ g.Expect(clusterOp.Metadata).To(Equal("3"), "StatefulSet scaling lock operation has the wrong metadata.")
+ })
+
+ // The first step is to increase the number of pods
+ statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should still have 3 pods, because the scale down should first move Solr replicas")
})
- Expect(statefulSet.Annotations).To(HaveKeyWithValue(util.ClusterOpsLockAnnotation, util.ScaleUpLock), "StatefulSet does not have a scaleUp lock after starting managed scaleUp.")
- Expect(statefulSet.Annotations).To(HaveKeyWithValue(util.ClusterOpsMetadataAnnotation, "1"), "StatefulSet scaleUp lock operation has the wrong metadata.")
+ clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
+ Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleUp lock.")
+ Expect(clusterOp.Operation).To(Equal(controllers.ScaleUpLock), "StatefulSet does not have a scaleUp lock.")
+ Expect(clusterOp.Metadata).To(Equal("3"), "StatefulSet scaling lock operation has the wrong metadata.")
By("waiting for the scaleUp to finish")
statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
- g.Expect(found.Annotations).To(Not(HaveKey(util.ClusterOpsLockAnnotation)), "StatefulSet should not have a scaling lock after scaling is complete.")
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a scaling lock after scaling is complete.")
})
- // Once the scale down actually occurs, the statefulSet annotations should already be removed
- Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsMetadataAnnotation)), "StatefulSet should not have scaling lock metadata after scaling is complete.")
queryCollection(ctx, solrCloud, solrCollection1, 0)
queryCollection(ctx, solrCloud, solrCollection2, 0)
@@ -201,8 +244,9 @@
statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should immediately have 3 pods.")
})
- Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsLockAnnotation)), "StatefulSet should not have a scaling lock, since scaleUp is unmanaged.")
- Expect(statefulSet.Annotations).To(Not(HaveKey(util.ClusterOpsMetadataAnnotation)), "StatefulSet should not have a scaling lock metadata, since scaleUp is unmanaged.")
+ clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
+ Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ Expect(clusterOp).To(BeNil(), "StatefulSet should not have a scaling lock, since scaleUp is unmanaged.")
By("Waiting for the new solrCloud pods to become ready")
solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
@@ -212,3 +256,75 @@
})
})
})
+
+var _ = FDescribe("E2E - SolrCloud - Scale Down Abandon", func() {
+ var (
+ solrCloud *solrv1beta1.SolrCloud
+
+ solrCollection1 = "e2e-1"
+ )
+
+ BeforeEach(func() {
+ solrCloud = generateBaseSolrCloudWithPlacementPolicy(2, "minimizecores")
+ })
+
+ JustBeforeEach(func(ctx context.Context) {
+ By("creating the SolrCloud")
+ Expect(k8sClient.Create(ctx, solrCloud)).To(Succeed())
+ DeferCleanup(func(ctx context.Context) {
+ cleanupTest(ctx, solrCloud)
+ })
+
+ By("Waiting for the SolrCloud to come up healthy")
+ solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
+
+ By("creating a first Solr Collection")
+ createAndQueryCollection(ctx, solrCloud, solrCollection1, 1, 2)
+ })
+
+ FContext("with replica migration", func() {
+
+ FIt("Abandons the ScaleDown", func(ctx context.Context) {
+ originalSolrCloud := solrCloud.DeepCopy()
+ solrCloud.Spec.Replicas = pointer.Int32(int32(1))
+ By("triggering a scale down via solrCloud replicas")
+ Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale up")
+
+ By("waiting for the scaleDown to begin")
+ expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should still have 2 pods, because the scale down should first move Solr replicas")
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
+ g.Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaling lock operation has the wrong metadata.")
+ })
+
+ By("Undo the scale down because the replicas cannot fit")
+ originalSolrCloud = solrCloud.DeepCopy()
+ solrCloud.Spec.Replicas = pointer.Int32(int32(2))
+ Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to cancel scale down")
+
+ By("Make sure that the operation is changed to a fake 'scaleUp' to redistribute replicas")
+ expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleUp lock.")
+ g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleUpLock), "StatefulSet does not have a scaleUp lock.")
+ g.Expect(clusterOp.Metadata).To(Equal("2"), "StatefulSet scaling lock operation has the wrong metadata.")
+ })
+
+ By("waiting for the fake scaleUp to finish")
+ statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
+ clusterOp, err := controllers.GetCurrentClusterOp(found)
+ g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
+ g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a scaling lock after scaling is complete.")
+ })
+
+ Expect(statefulSet.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "After everything, the statefulset should be configured to have 2 pods")
+ Expect(statefulSet.Status.Replicas).To(HaveValue(BeEquivalentTo(2)), "After everything, the statefulset should have 2 pods running")
+
+ queryCollection(ctx, solrCloud, solrCollection1, 0)
+ })
+ })
+})
diff --git a/tests/e2e/test_utils_test.go b/tests/e2e/test_utils_test.go
index 2568ed8..2330e99 100644
--- a/tests/e2e/test_utils_test.go
+++ b/tests/e2e/test_utils_test.go
@@ -519,3 +519,15 @@
},
}
}
+
+func generateBaseSolrCloudWithPlacementPolicy(replicas int, placementPlugin string) *solrv1beta1.SolrCloud {
+ solrCloud := generateBaseSolrCloud(replicas)
+ solrCloud.Spec.CustomSolrKubeOptions.PodOptions.EnvVariables = []corev1.EnvVar{
+ {
+ Name: "SOLR_PLACEMENTPLUGIN_DEFAULT",
+ Value: placementPlugin,
+ },
+ }
+
+ return solrCloud
+}