blob: 81ab608ed4cce5c1c35811de28e85b2e66ab50c0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package e2e
import (
"context"
solrv1beta1 "github.com/apache/solr-operator/api/v1beta1"
"github.com/apache/solr-operator/controllers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
"strings"
"time"
)
var _ = FDescribe("E2E - SolrCloud - Scale Down", func() {
var (
solrCloud *solrv1beta1.SolrCloud
solrCollection1 = "e2e-1"
solrCollection2 = "e2e-2"
)
BeforeEach(func() {
solrCloud = generateBaseSolrCloud(3)
})
JustBeforeEach(func(ctx context.Context) {
By("creating the SolrCloud")
Expect(k8sClient.Create(ctx, solrCloud)).To(Succeed())
DeferCleanup(func(ctx context.Context) {
cleanupTest(ctx, solrCloud)
})
By("Waiting for the SolrCloud to come up healthy")
solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
By("creating a first Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection1, 1, 1, 1)
By("creating a second Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection2, 1, 1, 2)
})
FContext("with replica migration", func() {
FIt("Scales Down", func(ctx context.Context) {
originalSolrCloud := solrCloud.DeepCopy()
solrCloud.Spec.Replicas = pointer.Int32(1)
By("triggering a scale down via solrCloud replicas")
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale down")
By("waiting for the scaleDown of first pod to begin")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should still have 3 pods, because the scale down should first move Solr replicas")
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Metadata).To(Equal("2"), "StatefulSet scaling lock operation has the wrong metadata.")
})
queryCollection(ctx, solrCloud, solrCollection2, 0)
By("waiting for the scaleDown of the first pod to finish")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should now have 2 pods, after the replicas have been moved off the first pod.")
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Metadata).To(Equal("2"), "StatefulSet scaling lock operation has the wrong metadata.")
})
queryCollection(ctx, solrCloud, solrCollection2, 0)
// Wait till the pod has actually been deleted
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Status.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should now have 2 pods, after the replicas have been moved off the first pod.")
})
By("waiting for the scaleDown of second pod to begin")
statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaling lock operation has the wrong metadata.")
})
// When the next scale down happens, the 3rd solr pod (ordinal 2) should be gone, and the statefulSet replicas should be 2 across the board.
// The first scale down should not be complete until this is done.
Expect(statefulSet.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should still have 2 pods configured, because the scale down should first move Solr replicas")
Expect(statefulSet.Status.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should only have 2 pods running, because previous pod scale down should have completely finished")
// This pod check must happen after the above clusterLock and replicas check.
// The StatefulSet controller might take a good amount of time to actually delete the pod,
// and the replica migration/cluster op might already be done by the time the first pod is deleted.
expectNoPodNow(ctx, solrCloud, solrCloud.GetSolrPodName(2))
queryCollection(ctx, solrCloud, solrCollection1, 0)
By("waiting for the scaleDown to finish")
statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(1)), "StatefulSet should now have 1 pods, after the replicas have been moved.")
})
// Once the scale down actually occurs, the clusterOp is not complete. We need to wait till the last pod is deleted
clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaling lock operation has the wrong metadata.")
// Wait for the last pod to be deleted
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Status.Replicas).To(HaveValue(BeEquivalentTo(1)), "StatefulSet should now have 1 pods, after the replicas have been moved.")
})
// Once the scale down actually occurs, the statefulSet annotations should be removed very soon
expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*2, time.Millisecond*500, func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err = controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a ScaleDown lock after scaling is complete.")
})
expectNoPod(ctx, solrCloud, solrCloud.GetSolrPodName(1))
queryCollection(ctx, solrCloud, solrCollection1, 0)
queryCollection(ctx, solrCloud, solrCollection2, 0)
})
})
FContext("without replica migration", func() {
BeforeEach(func() {
solrCloud.Spec.Scaling.VacatePodsOnScaleDown = pointer.Bool(false)
})
FIt("Scales Down", func(ctx context.Context) {
originalSolrCloud := solrCloud.DeepCopy()
solrCloud.Spec.Replicas = pointer.Int32(int32(1))
By("triggering a scale down via solrCloud replicas")
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale down")
By("make sure scaleDown happens without a clusterLock and eventually the replicas are removed")
statefulSet := expectStatefulSetWithConsistentChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a scaling lock, since scaleDown is unmanaged.")
})
Expect(statefulSet.Spec.Replicas).To(HaveValue(BeEquivalentTo(1)), "StatefulSet should immediately have 1 pod, since the scaleDown is unmanaged.")
expectNoPod(ctx, solrCloud, solrCloud.GetSolrPodName(1))
queryCollectionWithNoReplicaAvailable(ctx, solrCloud, solrCollection1)
})
})
})
var _ = FDescribe("E2E - SolrCloud - Scale Up", func() {
var (
solrCloud *solrv1beta1.SolrCloud
solrCollection1 = "e2e-1"
solrCollection2 = "e2e-2"
)
BeforeEach(func() {
solrCloud = generateBaseSolrCloud(1)
})
JustBeforeEach(func(ctx context.Context) {
By("creating the SolrCloud")
Expect(k8sClient.Create(ctx, solrCloud)).To(Succeed())
DeferCleanup(func(ctx context.Context) {
cleanupTest(ctx, solrCloud)
})
By("Waiting for the SolrCloud to come up healthy")
solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
By("creating a first Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection1, 1, 1)
By("creating a second Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection2, 2, 1)
})
FContext("with replica migration", func() {
FIt("Scales Up", func(ctx context.Context) {
originalSolrCloud := solrCloud.DeepCopy()
solrCloud.Spec.Replicas = pointer.Int32(int32(3))
By("triggering a scale down via solrCloud replicas")
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale up")
By("waiting for the scaleUp to begin")
statefulSet := expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*5, time.Millisecond*5, func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleUp lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleUpLock), "StatefulSet does not have a scaleUp lock.")
g.Expect(clusterOp.Metadata).To(Equal("3"), "StatefulSet scaling lock operation has the wrong metadata.")
})
// The first step is to increase the number of pods
// Check very often, as the new pods will be created quickly, which will cause the cluster op to change.
statefulSet = expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*5, time.Millisecond*5, func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should still have 3 pods, because the scale down should first move Solr replicas")
})
clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleUp lock.")
Expect(clusterOp.Operation).To(Equal(controllers.ScaleUpLock), "StatefulSet does not have a scaleUp lock.")
Expect(clusterOp.Metadata).To(Equal("3"), "StatefulSet scaling lock operation has the wrong metadata.")
// Wait for new pods to come up, and when they do we should be doing a balanceReplicas clusterOp
statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Status.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should still have 3 pods, because the scale down should first move Solr replicas")
})
clusterOp, err = controllers.GetCurrentClusterOp(statefulSet)
Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a balanceReplicas lock after new pods are created.")
Expect(clusterOp.Operation).To(Equal(controllers.BalanceReplicasLock), "StatefulSet does not have a balanceReplicas lock after new pods are created.")
Expect(clusterOp.Metadata).To(Equal("ScaleUp"), "StatefulSet balanceReplicas lock operation has the wrong metadata.")
By("waiting for the scaleUp to finish")
statefulSet = expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a balanceReplicas lock after balancing is complete.")
})
queryCollection(ctx, solrCloud, solrCollection1, 0)
queryCollection(ctx, solrCloud, solrCollection2, 0)
})
})
FContext("without replica migration", func() {
BeforeEach(func() {
solrCloud.Spec.Scaling.PopulatePodsOnScaleUp = pointer.Bool(false)
})
FIt("Scales Up", func(ctx context.Context) {
originalSolrCloud := solrCloud.DeepCopy()
solrCloud.Spec.Replicas = pointer.Int32(int32(3))
By("triggering a scale down via solrCloud replicas")
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale down")
By("make sure scaleDown happens without a clusterLock and eventually the replicas are removed")
statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(3)), "StatefulSet should immediately have 3 pods.")
})
clusterOp, err := controllers.GetCurrentClusterOp(statefulSet)
Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
Expect(clusterOp).To(BeNil(), "StatefulSet should not have a scaling lock, since scaleUp is unmanaged.")
By("Waiting for the new solrCloud pods to become ready")
solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
queryCollection(ctx, solrCloud, solrCollection1, 0)
queryCollection(ctx, solrCloud, solrCollection2, 0)
})
})
})
var _ = FDescribe("E2E - SolrCloud - Scale Down Abandon", func() {
var (
solrCloud *solrv1beta1.SolrCloud
solrCollection1 = "e2e-1"
)
BeforeEach(func() {
solrCloud = generateBaseSolrCloudWithPlacementPolicy(2, "minimizecores")
if strings.Contains(solrImage, ":8") || strings.Contains(solrImage, "8.") {
Skip("Cannot run the Scale Down Abandon test with Solr 8, as a working placementPolicy for the test cannot be defaulted")
}
})
JustBeforeEach(func(ctx context.Context) {
By("creating the SolrCloud")
Expect(k8sClient.Create(ctx, solrCloud)).To(Succeed())
DeferCleanup(func(ctx context.Context) {
cleanupTest(ctx, solrCloud)
})
By("Waiting for the SolrCloud to come up healthy")
solrCloud = expectSolrCloudToBeReady(ctx, solrCloud)
By("creating a first Solr Collection")
createAndQueryCollection(ctx, solrCloud, solrCollection1, 1, 2)
})
FContext("with replica migration", func() {
FIt("Abandons the ScaleDown", func(ctx context.Context) {
originalSolrCloud := solrCloud.DeepCopy()
solrCloud.Spec.Replicas = pointer.Int32(int32(1))
By("triggering a scale down via solrCloud replicas")
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to initiate scale up")
By("waiting for the scaleDown to begin")
expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
g.Expect(found.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "StatefulSet should still have 2 pods, because the scale down should first move Solr replicas")
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaling lock operation has the wrong metadata.")
})
By("Undo the scale down because the replicas cannot fit")
originalSolrCloud = solrCloud.DeepCopy()
solrCloud.Spec.Replicas = pointer.Int32(int32(2))
Expect(k8sClient.Patch(ctx, solrCloud, client.MergeFrom(originalSolrCloud))).To(Succeed(), "Could not patch SolrCloud replicas to cancel scale down")
By("Make sure the scaleDown attempts for a minute until it times out")
// The scaleDown will timeout after a minute, so we have to wait a bit over a minute
expectStatefulSetWithConsistentChecksAndDuration(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*50, func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.ScaleDownLock), "StatefulSet does not have a scaleDown lock.")
g.Expect(clusterOp.Metadata).To(Equal("1"), "StatefulSet scaleDown lock operation has the wrong metadata.")
})
By("Make sure that the operation is changed to a balanceReplicas to redistribute replicas")
// The scaleDown will timeout after a minute, so we have to wait a bit over a minute
expectStatefulSetWithChecksAndTimeout(ctx, solrCloud, solrCloud.StatefulSetName(), time.Second*30, time.Millisecond*10, func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).ToNot(BeNil(), "StatefulSet does not have a balanceReplicas lock.")
g.Expect(clusterOp.Operation).To(Equal(controllers.BalanceReplicasLock), "StatefulSet does not have a balanceReplicas lock.")
g.Expect(clusterOp.Metadata).To(Equal("UndoFailedScaleDown"), "StatefulSet balanceReplicas lock operation has the wrong metadata.")
})
By("waiting for the balanceReplicas to finish")
statefulSet := expectStatefulSetWithChecks(ctx, solrCloud, solrCloud.StatefulSetName(), func(g Gomega, found *appsv1.StatefulSet) {
clusterOp, err := controllers.GetCurrentClusterOp(found)
g.Expect(err).ToNot(HaveOccurred(), "Error occurred while finding clusterLock for SolrCloud")
g.Expect(clusterOp).To(BeNil(), "StatefulSet should not have a balanceReplicas lock after balancing is complete.")
})
Expect(statefulSet.Spec.Replicas).To(HaveValue(BeEquivalentTo(2)), "After everything, the statefulset should be configured to have 2 pods")
Expect(statefulSet.Status.Replicas).To(HaveValue(BeEquivalentTo(2)), "After everything, the statefulset should have 2 pods running")
queryCollection(ctx, solrCloud, solrCollection1, 0)
})
})
})