Make PodDisruptionBudget an optional feature (#540)
diff --git a/api/v1beta1/solrcloud_types.go b/api/v1beta1/solrcloud_types.go
index c8b77c6..3e97b01 100644
--- a/api/v1beta1/solrcloud_types.go
+++ b/api/v1beta1/solrcloud_types.go
@@ -91,6 +91,10 @@
// +optional
UpdateStrategy SolrUpdateStrategy `json:"updateStrategy,omitempty"`
+ // Define how Solr nodes should be available.
+ // +optional
+ Availability SolrAvailabilityOptions `json:"availability,omitempty"`
+
// +optional
BusyBoxImage *ContainerImage `json:"busyBoxImage,omitempty"`
@@ -709,7 +713,7 @@
}
// SolrUpdateMethod is a string enumeration type that enumerates
-// all possible ways that a SolrCloud can having rolling updates managed.
+// all possible ways that a SolrCloud can have rolling updates managed.
// +kubebuilder:validation:Enum=Managed;StatefulSet;Manual
type SolrUpdateMethod string
@@ -735,7 +739,7 @@
return changed
}
-// Spec to control the desired behavior of managed rolling update.
+// ManagedUpdateOptions control the desired behavior of managed rolling update.
type ManagedUpdateOptions struct {
// The maximum number of pods that can be unavailable during the update.
@@ -759,6 +763,33 @@
MaxShardReplicasUnavailable *intstr.IntOrString `json:"maxShardReplicasUnavailable,omitempty"`
}
+type SolrAvailabilityOptions struct {
+ // Define PodDisruptionBudget(s) to ensure availability of Solr
+ // +optional
+ PodDisruptionBudget SolrPodDisruptionBudgetOptions `json:"podDisruptionBudget,omitempty"`
+}
+
+type SolrPodDisruptionBudgetOptions struct {
+ // What method should be used when creating PodDisruptionBudget(s)
+ // +kubebuilder:default=true
+ Enabled *bool `json:"enabled"`
+
+ // What method should be used when creating PodDisruptionBudget(s)
+ // +kubebuilder:default=ClusterWide
+ Method SolrPodDisruptionBudgetMethod `json:"method,omitempty"`
+}
+
+// SolrPodDisruptionBudgetMethod is a string enumeration type that enumerates
+// all possible ways that a SolrCloud can have PodDisruptionBudgets managed.
+// +kubebuilder:validation:Enum=ClusterWide
+type SolrPodDisruptionBudgetMethod string
+
+const (
+ // ClusterWidePDB will result in a single cluster-wide PDB being created to ensure availability of the SolrCloud.
+ // This will not take replica/shard readiness into account.
+ ClusterWidePDB SolrPodDisruptionBudgetMethod = "ClusterWide"
+)
+
// ZookeeperRef defines the zookeeper ensemble for solr to connect to
// If no ConnectionString is provided, the solr-cloud controller will create and manage an internal ensemble
type ZookeeperRef struct {
diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go
index 8f48453..c3e2014 100644
--- a/api/v1beta1/zz_generated.deepcopy.go
+++ b/api/v1beta1/zz_generated.deepcopy.go
@@ -649,6 +649,22 @@
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SolrAvailabilityOptions) DeepCopyInto(out *SolrAvailabilityOptions) {
+ *out = *in
+ in.PodDisruptionBudget.DeepCopyInto(&out.PodDisruptionBudget)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SolrAvailabilityOptions.
+func (in *SolrAvailabilityOptions) DeepCopy() *SolrAvailabilityOptions {
+ if in == nil {
+ return nil
+ }
+ out := new(SolrAvailabilityOptions)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SolrBackup) DeepCopyInto(out *SolrBackup) {
*out = *in
out.TypeMeta = in.TypeMeta
@@ -890,6 +906,7 @@
in.CustomSolrKubeOptions.DeepCopyInto(&out.CustomSolrKubeOptions)
in.SolrAddressability.DeepCopyInto(&out.SolrAddressability)
in.UpdateStrategy.DeepCopyInto(&out.UpdateStrategy)
+ in.Availability.DeepCopyInto(&out.Availability)
if in.BusyBoxImage != nil {
in, out := &in.BusyBoxImage, &out.BusyBoxImage
*out = new(ContainerImage)
@@ -1069,6 +1086,26 @@
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SolrPodDisruptionBudgetOptions) DeepCopyInto(out *SolrPodDisruptionBudgetOptions) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SolrPodDisruptionBudgetOptions.
+func (in *SolrPodDisruptionBudgetOptions) DeepCopy() *SolrPodDisruptionBudgetOptions {
+ if in == nil {
+ return nil
+ }
+ out := new(SolrPodDisruptionBudgetOptions)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SolrPrometheusExporter) DeepCopyInto(out *SolrPrometheusExporter) {
*out = *in
out.TypeMeta = in.TypeMeta
diff --git a/config/crd/bases/solr.apache.org_solrclouds.yaml b/config/crd/bases/solr.apache.org_solrclouds.yaml
index c0cbb16..4d3cf1c 100644
--- a/config/crd/bases/solr.apache.org_solrclouds.yaml
+++ b/config/crd/bases/solr.apache.org_solrclouds.yaml
@@ -89,6 +89,27 @@
items:
type: string
type: array
+ availability:
+ description: Define how Solr nodes should be available.
+ properties:
+ podDisruptionBudget:
+ description: Define PodDisruptionBudget(s) to ensure availability
+ of Solr
+ properties:
+ enabled:
+ default: true
+ description: What method should be used when creating PodDisruptionBudget(s)
+ type: boolean
+ method:
+ default: ClusterWide
+ description: What method should be used when creating PodDisruptionBudget(s)
+ enum:
+ - ClusterWide
+ type: string
+ required:
+ - enabled
+ type: object
+ type: object
backupRepositories:
description: Allows specification of multiple different "repositories"
for Solr to use when backing up data.
diff --git a/controllers/controller_utils_test.go b/controllers/controller_utils_test.go
index b7349cd..899c33f 100644
--- a/controllers/controller_utils_test.go
+++ b/controllers/controller_utils_test.go
@@ -360,10 +360,17 @@
return expectPodDisruptionBudgetWithChecks(ctx, parentResource, podDisruptionBudgetName, selector, maxUnavailable, nil, resolveOffset(additionalOffset))
}
+func expectNoPodDisruptionBudget(ctx context.Context, parentResource client.Object, podDisruptionBudgetName string, additionalOffset ...int) {
+ podDisruptionBudget := &policyv1.PodDisruptionBudget{}
+ EventuallyWithOffset(resolveOffset(additionalOffset), func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, resourceKey(parentResource, podDisruptionBudgetName), podDisruptionBudget)).To(MatchError("poddisruptionbudgets.policy \""+podDisruptionBudgetName+"\" not found"), "Expected pdb to eventually not exist")
+ }).Should(Succeed())
+}
+
func expectPodDisruptionBudgetWithChecks(ctx context.Context, parentResource client.Object, podDisruptionBudgetName string, selector *metav1.LabelSelector, maxUnavailable intstr.IntOrString, additionalChecks func(Gomega, *policyv1.PodDisruptionBudget), additionalOffset ...int) *policyv1.PodDisruptionBudget {
podDisruptionBudget := &policyv1.PodDisruptionBudget{}
EventuallyWithOffset(resolveOffset(additionalOffset), func(g Gomega) {
- g.Expect(k8sClient.Get(ctx, resourceKey(parentResource, podDisruptionBudgetName), podDisruptionBudget)).To(Succeed(), "Expected ConfigMap does not exist")
+ g.Expect(k8sClient.Get(ctx, resourceKey(parentResource, podDisruptionBudgetName), podDisruptionBudget)).To(Succeed(), "Expected PodDisruptionBudget does not exist")
// Verify the PodDisruptionBudget Spec
g.Expect(podDisruptionBudget.Spec.Selector).To(Equal(selector), "PodDisruptionBudget does not have the correct selector.")
diff --git a/controllers/solrcloud_controller.go b/controllers/solrcloud_controller.go
index d0e3363..e1ac4e3 100644
--- a/controllers/solrcloud_controller.go
+++ b/controllers/solrcloud_controller.go
@@ -464,31 +464,37 @@
}
}
- // PodDistruptionBudget(s)
+ // Upsert or delete solrcloud-wide PodDisruptionBudget(s) based on 'Enabled' flag.
pdb := util.GeneratePodDisruptionBudget(instance, pvcLabelSelector)
+ if instance.Spec.Availability.PodDisruptionBudget.Enabled != nil && *instance.Spec.Availability.PodDisruptionBudget.Enabled {
+ // Check if the PodDistruptionBudget already exists
+ pdbLogger := logger.WithValues("podDisruptionBudget", pdb.Name)
+ foundPDB := &policyv1.PodDisruptionBudget{}
+ err = r.Get(ctx, types.NamespacedName{Name: pdb.Name, Namespace: pdb.Namespace}, foundPDB)
+ if err != nil && errors.IsNotFound(err) {
+ pdbLogger.Info("Creating PodDisruptionBudget")
+ if err = controllerutil.SetControllerReference(instance, pdb, r.Scheme); err == nil {
+ err = r.Create(ctx, pdb)
+ }
+ } else if err == nil {
+ var needsUpdate bool
+ needsUpdate, err = util.OvertakeControllerRef(instance, foundPDB, r.Scheme)
+ needsUpdate = util.CopyPodDisruptionBudgetFields(pdb, foundPDB, pdbLogger) || needsUpdate
- // Check if the PodDistruptionBudget already exists
- pdbLogger := logger.WithValues("podDisruptionBudget", pdb.Name)
- foundPDB := &policyv1.PodDisruptionBudget{}
- err = r.Get(ctx, types.NamespacedName{Name: pdb.Name, Namespace: pdb.Namespace}, foundPDB)
- if err != nil && errors.IsNotFound(err) {
- pdbLogger.Info("Creating PodDisruptionBudget")
- if err = controllerutil.SetControllerReference(instance, pdb, r.Scheme); err == nil {
- err = r.Create(ctx, pdb)
+ // Update the found PodDistruptionBudget and write the result back if there are any changes
+ if needsUpdate && err == nil {
+ pdbLogger.Info("Updating PodDisruptionBudget")
+ err = r.Update(ctx, foundPDB)
+ }
}
- } else if err == nil {
- var needsUpdate bool
- needsUpdate, err = util.OvertakeControllerRef(instance, foundPDB, r.Scheme)
- needsUpdate = util.CopyPodDisruptionBudgetFields(pdb, foundPDB, pdbLogger) || needsUpdate
-
- // Update the found PodDistruptionBudget and write the result back if there are any changes
- if needsUpdate && err == nil {
- pdbLogger.Info("Updating PodDisruptionBudget")
- err = r.Update(ctx, foundPDB)
+ if err != nil {
+ return requeueOrNot, err
}
- }
- if err != nil {
- return requeueOrNot, err
+ } else { // PDB is disabled, make sure that we delete any previously created pdb that might exist.
+ err = r.Client.Delete(ctx, pdb)
+ if err != nil && !errors.IsNotFound(err) {
+ return requeueOrNot, err
+ }
}
extAddressabilityOpts := instance.Spec.SolrAddressability.External
diff --git a/controllers/solrcloud_controller_test.go b/controllers/solrcloud_controller_test.go
index d1337bc..52694de 100644
--- a/controllers/solrcloud_controller_test.go
+++ b/controllers/solrcloud_controller_test.go
@@ -33,6 +33,11 @@
"strings"
)
+func newBoolPtr(value bool) *bool {
+ newBool := value
+ return &newBool
+}
+
var _ = FDescribe("SolrCloud controller - General", func() {
var (
ctx context.Context
@@ -88,6 +93,12 @@
InitContainers: extraContainers2,
},
},
+ Availability: solrv1beta1.SolrAvailabilityOptions{
+ PodDisruptionBudget: solrv1beta1.SolrPodDisruptionBudgetOptions{
+ Enabled: newBoolPtr(true),
+ Method: "ClusterWide",
+ },
+ },
}
})
FIt("has the correct resources", func() {
@@ -151,6 +162,11 @@
By("testing the PodDisruptionBudget")
expectPodDisruptionBudget(ctx, solrCloud, solrCloud.StatefulSetName(), statefulSet.Spec.Selector, intstr.FromString(util.DefaultMaxPodsUnavailable))
+ expectSolrCloudWithChecks(ctx, solrCloud, func(g Gomega, found *solrv1beta1.SolrCloud) {
+ found.Spec.Availability.PodDisruptionBudget.Enabled = newBoolPtr(false)
+ g.Expect(k8sClient.Update(ctx, found)).To(Succeed(), "Disable the PDB for the solrcloud")
+ })
+ expectNoPodDisruptionBudget(ctx, solrCloud, solrCloud.StatefulSetName())
})
})
@@ -176,6 +192,12 @@
},
RestartSchedule: "@every 30m",
},
+ Availability: solrv1beta1.SolrAvailabilityOptions{
+ PodDisruptionBudget: solrv1beta1.SolrPodDisruptionBudgetOptions{
+ Enabled: newBoolPtr(false),
+ Method: "ClusterWide",
+ },
+ },
SolrGCTune: "gc Options",
CustomSolrKubeOptions: solrv1beta1.CustomSolrKubeOptions{
PodOptions: &solrv1beta1.PodOptions{
@@ -291,7 +313,7 @@
Expect(*headlessService.Spec.Ports[0].AppProtocol).To(Equal("http"), "Wrong appProtocol on headless Service")
By("testing the PodDisruptionBudget")
- expectPodDisruptionBudget(ctx, solrCloud, solrCloud.StatefulSetName(), statefulSet.Spec.Selector, three)
+ expectNoPodDisruptionBudget(ctx, solrCloud, solrCloud.StatefulSetName())
})
})
@@ -335,6 +357,10 @@
Expect(statefulSet.Annotations).To(Equal(expectedStatefulSetAnnotations), "Incorrect statefulSet annotations")
Expect(statefulSet.Spec.Template.Spec.Containers[0].Lifecycle.PostStart.Exec.Command).To(ConsistOf("sh", "-c", "solr zk ls ${ZK_CHROOT} -z ${ZK_SERVER} || solr zk mkroot ${ZK_CHROOT} -z ${ZK_SERVER}"), "Incorrect post-start command")
Expect(statefulSet.Spec.Template.Spec.ServiceAccountName).To(BeEmpty(), "No custom serviceAccountName specified, so the field should be empty.")
+
+ // PodDisruptionBudget creation should be enabled by default
+ By("testing the PodDisruptionBudget")
+ expectPodDisruptionBudget(ctx, solrCloud, solrCloud.StatefulSetName(), statefulSet.Spec.Selector, intstr.FromString(util.DefaultMaxPodsUnavailable))
})
})
diff --git a/docs/solr-cloud/solr-cloud-crd.md b/docs/solr-cloud/solr-cloud-crd.md
index 47dbce7..5596cce 100644
--- a/docs/solr-cloud/solr-cloud-crd.md
+++ b/docs/solr-cloud/solr-cloud-crd.md
@@ -99,8 +99,17 @@
### Pod Disruption Budgets
_Since v0.7.0_
-The Solr Operator will create a [`PodDisruptionBudget`](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) to ensure that Kubernetes does not take down more than acceptable amount of SolrCloud nodes at a time.
-The PDB's `maxUnavailable` setting is populated from the `maxPodsUnavailable` setting in `SolrCloud.Spec.updateStrategy.managed`.
+The Solr Operator can optionally create a [`PodDisruptionBudget`](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) to ensure that Kubernetes does not take down more than an acceptable amount of SolrCloud nodes at a time.
+This behavior is controlled by the `.spec.availability.podDisruptionBudget.enabled` setting, which defaults to "true" but can be disabled if desired as in the snippet below:
+
+```yaml
+spec:
+ availability:
+ podDisruptionBudget:
+ enabled: false
+```
+
+When not disabled, the PDB's `maxUnavailable` setting is populated from the `maxPodsUnavailable` setting in `SolrCloud.Spec.updateStrategy.managed`.
If this option is not set, it will use the default value (`25%`).
Currently, the implementation does not take shard/replica topology into account, like the update strategy does.
diff --git a/docs/upgrade-notes.md b/docs/upgrade-notes.md
index 34c69a3..4a12451 100644
--- a/docs/upgrade-notes.md
+++ b/docs/upgrade-notes.md
@@ -120,9 +120,10 @@
Please refer to the [Zookeeper Operator release notes](https://github.com/pravega/zookeeper-operator/releases) before upgrading.
Make sure to install the correct version of the Zookeeper Operator CRDs, as [shown above](#upgrading-the-zookeeper-operator).
-- `PodDisruptionBudgets` are now created alongside SolrCloud instances.
+- `PodDisruptionBudgets` are now created by default alongside SolrCloud instances.
The maximum number of pods allowed down at any given time is aligned with the [Managed Update settings](solr-cloud/solr-cloud-crd.md#update-strategy) provided in the spec.
If this is not provided, the default setting (`25%`) is used.
+ `PodDisruptionBudget` creation can be disabled for a solrcloud resource, by setting `spec.availability.podDisruptionBudget.enabled` to false.
- Provided Zookeeper pods use the `IfNotPresent` pullPolicy by default. Users that specify this field manually will not see a change.
diff --git a/helm/solr-operator/Chart.yaml b/helm/solr-operator/Chart.yaml
index 4ae88f2..3142875 100644
--- a/helm/solr-operator/Chart.yaml
+++ b/helm/solr-operator/Chart.yaml
@@ -162,6 +162,13 @@
url: https://github.com/apache/solr-operator/issues/537
- name: GitHub PR
url: https://github.com/apache/solr-operator/pull/548
+ - kind: added
+ description: Added Option to enable/disable the PodDisruptionBudget for the cluster.
+ links:
+ - name: GitHub Issue
+ url: https://github.com/apache/solr-operator/issues/538
+ - name: GitHub PR
+ url: https://github.com/apache/solr-operator/pull/540
artifacthub.io/images: |
- name: solr-operator
image: apache/solr-operator:v0.7.0-prerelease
diff --git a/helm/solr-operator/crds/crds.yaml b/helm/solr-operator/crds/crds.yaml
index b0292e5..8ac1456 100644
--- a/helm/solr-operator/crds/crds.yaml
+++ b/helm/solr-operator/crds/crds.yaml
@@ -338,6 +338,27 @@
items:
type: string
type: array
+ availability:
+ description: Define how Solr nodes should be available.
+ properties:
+ podDisruptionBudget:
+ description: Define PodDisruptionBudget(s) to ensure availability
+ of Solr
+ properties:
+ enabled:
+ default: true
+ description: What method should be used when creating PodDisruptionBudget(s)
+ type: boolean
+ method:
+ default: ClusterWide
+ description: What method should be used when creating PodDisruptionBudget(s)
+ enum:
+ - ClusterWide
+ type: string
+ required:
+ - enabled
+ type: object
+ type: object
backupRepositories:
description: Allows specification of multiple different "repositories"
for Solr to use when backing up data.
diff --git a/helm/solr/README.md b/helm/solr/README.md
index 7c74142..df858f0 100644
--- a/helm/solr/README.md
+++ b/helm/solr/README.md
@@ -107,6 +107,8 @@
| updateStrategy.managedUpdate.maxPodsUnavailable | int-or-string | `"25%"` | The number of Solr pods in a Solr Cloud that are allowed to be unavailable during the rolling restart. Either a static number, or a percentage representing the percentage of total pods requested for the statefulSet. |
| updateStrategy.managedUpdate.maxShardReplicasUnavailable | int-or-string | `1` | The number of replicas for each shard allowed to be unavailable during the restart. Either a static number, or a percentage representing the percentage of the number of replicas for a shard. |
| updateStrategy.restartSchedule | [string (CRON)](https://pkg.go.dev/github.com/robfig/cron/v3?utm_source=godoc#hdr-CRON_Expression_Format) | | A CRON schedule for automatically restarting the Solr Cloud. [Refer here](https://pkg.go.dev/github.com/robfig/cron/v3?utm_source=godoc#hdr-CRON_Expression_Format) for all possible CRON syntaxes accepted. |
+| availability.podDisruptionBudget.enabled | boolean | `true` | Create [PodDisruptionBudget(s)](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) to ensure the availability of SolrNodes. |
+| availability.podDisruptionBudget.method | string | `"ClusterWide"` | The method by which PodDisruptionBudgets should be created. The only option currently is `ClusterWide`. |
| serviceAccount.create | boolean | `false` | Create a serviceAccount to be used for all pods being deployed (Solr & ZK). If `serviceAccount.name` is not specified, the full name of the deployment will be used. |
| serviceAccount.name | string | | The optional default service account used for Solr and ZK unless overridden below. If `serviceAccount.create` is set to `false`, this serviceAccount must exist in the target namespace. |
| backupRepositories | []object | | A list of BackupRepositories to connect your SolrCloud to. Visit the [SolrBackup docs](https://apache.github.io/solr-operator/docs/solr-backup) or run `kubectl explain solrcloud.spec.backupRepositories` to see the available options. |
diff --git a/helm/solr/templates/solrcloud.yaml b/helm/solr/templates/solrcloud.yaml
index 294c298..983210e 100644
--- a/helm/solr/templates/solrcloud.yaml
+++ b/helm/solr/templates/solrcloud.yaml
@@ -108,6 +108,12 @@
{{- end }}
{{- end }}
+ {{- if and (.Values.availability) (.Values.availability.podDisruptionBudget) }}
+ availability:
+ podDisruptionBudget:
+ {{- toYaml .Values.availability.podDisruptionBudget | nindent 6 }}
+ {{- end }}
+
{{- if .Values.dataStorage }}
dataStorage:
{{- if eq .Values.dataStorage.type "persistent" }}
diff --git a/helm/solr/values.yaml b/helm/solr/values.yaml
index 3b02912..1e1ac1f 100644
--- a/helm/solr/values.yaml
+++ b/helm/solr/values.yaml
@@ -136,6 +136,13 @@
annotations: {}
storageClassName: ""
+# How to control availability for Solr Nodes
+availability:
+ # Create PodDisruptionBudget(s) to ensure availability of Solr Nodes
+ podDisruptionBudget:
+ enabled: true
+ method: ClusterWide
+
# A list of BackupRepositories to connect your SolrCloud to
# See either for more information:
# - https://apache.github.io/solr-operator/docs/solr-backup