blob: 6143342aff4d3c7dac85abbc9b4dba97b4538246 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package e2e
import (
"context"
solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
"github.com/apache/solr-operator/controllers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"sigs.k8s.io/controller-runtime/pkg/client"
"time"
)
var _ = FDescribe("E2E - SolrCloud - Rolling Upgrades", func() {
var (
solrCloud *solrv1beta1.SolrCloud
solrCollection1 = "e2e-1"
solrCollection2 = "e2e-2"
)
BeforeEach(func() {
solrCloud = generateBaseSolrCloud(3)
})
JustBeforeEach(func(ctx context.Context) {
By("creating the SolrCloud")
Expect(k8sClient.Create(ctx, solrCloud)).To(Succeed())
DeferCleanup(func(ctx context.Context) {
cleanupTest(ctx, solrCloud)
})
By("Waiting for the SolrCloud to come up healthy")
solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
By("creating a first Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection1, 1, 2)
By("creating a second Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection2, 2, 1)
})
FContext("Managed Update - Ephemeral Data - Slow", func() {
BeforeEach(func() {
one := intstr.FromInt(1)
hundredPerc := intstr.FromString("100%")
solrCloud.Spec.UpdateStrategy = solrv1beta1.SolrUpdateStrategy{
Method: solrv1beta1.ManagedUpdate,
ManagedUpdateOptions: solrv1beta1.ManagedUpdateOptions{
MaxPodsUnavailable: &one,
MaxShardReplicasUnavailable: &hundredPerc,
},
}
})
FIt("Fully Restarts", func(ctx context.Context) {
patchedSolrCloud := solrCloud.DeepCopy()
patchedSolrCloud.Spec.CustomSolrKubeOptions.PodOptions = &solrv1beta1.PodOptions{
Annotations: map[string]string{
"test": "restart-1",
},
}
By("triggering a rolling restart via pod annotations")
Expect(k8sClient.Patch(ctx, patchedSolrCloud, client.MergeFrom(solrCloud))).To(Succeed(), "Could not add annotation to SolrCloud pod to initiate rolling restart")
By("waiting for the rolling restart to begin")
solrCloud = expectSolrCloudWithChecks(ctx, solrCloud, func(g Gomega, cloud *solrv1beta1.SolrCloud) {
g.Expect(cloud.Status.UpToDateNodes).To(BeZero(), "Cloud did not get to a state with zero up-to-date replicas when rolling restart began.")
for _, nodeStatus := range cloud.Status.SolrNodes {
g.Expect(nodeStatus.SpecUpToDate).To(BeFalse(), "Node not starting as out-of-date when rolling restart begins: %s", nodeStatus.Name)
}
})
statefulSet := expectStatefulSet(ctx, solrCloud, solrCloud.StatefulSetName())
clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a RollingUpdate lock.")
Expect(clusterOp.Operation).To(Equal(controllers.UpdateLock), "StatefulSet does not have a RollingUpdate lock after starting managed update.")
By("waiting for the rolling restart to complete")
// Expect the SolrCloud to be up-to-date, or in a valid restarting state
lastCheckNodeStatuses := make(map[string]solrv1beta1.SolrNodeStatus, *solrCloud.Spec.Replicas)
lastCheckReplicas := *solrCloud.Spec.Replicas
expectSolrCloudWithChecks(ctx, solrCloud, func(g Gomega, cloud *solrv1beta1.SolrCloud) {
// If there are more than 1 pods not ready, then fail because we have set MaxPodsUnavailable to 1
if cloud.Status.ReadyReplicas < *solrCloud.Spec.Replicas-int32(1) {
StopTrying("More than 1 pod (replica) is not ready, which is not allowed by the managed upgrade options").
Attach("Replicas", *solrCloud.Spec.Replicas).
Attach("ReadyReplicas", cloud.Status.ReadyReplicas).
Attach("SolrCloud Status", cloud.Status).
Now()
}
// Make sure that if a pod is deleted/recreated, it was first taken offline and "scheduledForDeletion" was set to true
// TODO: Try to find a better way to make sure that the deletion readinessCondition works
if cloud.Status.Replicas < lastCheckReplicas {
// We only want to check the statuses of nodes that the pods have been deleted, or they have been re-created since our last check
for _, nodeStatus := range cloud.Status.SolrNodes {
if !nodeStatus.SpecUpToDate || lastCheckNodeStatuses[nodeStatus.Name].SpecUpToDate {
delete(lastCheckNodeStatuses, nodeStatus.Name)
}
}
for _, nodeStatus := range cloud.Status.SolrNodes {
oldNodeStatus := lastCheckNodeStatuses[nodeStatus.Name]
g.Expect(oldNodeStatus.ScheduledForDeletion).To(BeTrue(), "Before SolrNode %s is taken down, scheduledForDeletion should be true", nodeStatus.Name)
g.Expect(oldNodeStatus.Ready).To(BeFalse(), "Before SolrNode %s is taken down, it should not be ready", nodeStatus.Name)
}
}
// Update the nodeStatuses for the next iteration's readinessCondition check
lastCheckReplicas = cloud.Status.Replicas
for _, nodeStatus := range cloud.Status.SolrNodes {
lastCheckNodeStatuses[nodeStatus.Name] = nodeStatus
if nodeStatus.Ready || nodeStatus.SpecUpToDate {
g.Expect(nodeStatus.ScheduledForDeletion).To(BeFalse(), "SolrNode %s cannot be scheduledForDeletion while being 'ready' or 'upToDate'", nodeStatus.Name)
} else {
g.Expect(nodeStatus.ScheduledForDeletion).To(BeTrue(), "SolrNode %s must be scheduledForDeletion while not being 'ready' or 'upToDate', so it was taken down for the update", nodeStatus.Name)
}
}
// As long as the current restart is in a healthy place, keep checking if the restart is finished
g.Expect(cloud.Status.UpToDateNodes).To(Equal(*cloud.Spec.Replicas), "The SolrCloud did not finish the rolling restart, not all nodes are up-to-date")
})
By("When the rolling update is done, a balanceReplicas operation should be started")
// Wait for new pods to come up, and when they do we should be doing a balanceReplicas clusterOp
statefulSet = expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*45, time.Millisecond, func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Status.ReadyReplicas).To(BeEquivalentTo(*found.Spec.Replicas), "The SolrCloud did not finish the rolling restart, all nodes are up-to-date, but not all are ready")
clusterOp, err = controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a balanceReplicas lock after rolling update is complete.")
g.Expect(clusterOp.Operation).To(Equal(controllers.BalanceReplicasLock), "StatefulSet does not have a balanceReplicas lock after rolling update is complete.")
g.Expect(clusterOp.Metadata).To(Equal("RollingUpdateComplete"), "StatefulSet balanceReplicas lock operation has the wrong metadata.")
})
// After all pods are ready, make sure that the SolrCloud status is correct
solrCloud = expectSolrCloud(ctx, solrCloud)
Expect(solrCloud.Status.ReadyReplicas).To(Equal(solrCloud.Status.UpToDateNodes), "The SolrCloud did not finish the rolling restart, all nodes are up-to-date, but not all are ready")
// Make sure that the status object is correct for the nodes
for _, nodeStatus := range solrCloud.Status.SolrNodes {
Expect(nodeStatus.SpecUpToDate).To(BeTrue(), "Node not finishing as up-to-date when rolling restart ends: %s", nodeStatus.Name)
Expect(nodeStatus.Ready).To(BeTrue(), "Node not finishing as ready when rolling restart ends: %s", nodeStatus.Name)
}
By("waiting for the balanceReplicas to finish")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a balanceReplicas lock after balancing is complete.")
})
By("checking that the collections can be queried after the restart")
queryCollection(ctx, solrCloud, solrCollection1, 0)
queryCollection(ctx, solrCloud, solrCollection2, 0)
})
})
})