| // Copyright Istio Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package metrics defines metrics and monitoring functionality |
| // used throughout operator. |
| package metrics |
| |
| import ( |
| "istio.io/pkg/monitoring" |
| ) |
| |
| var ( |
| // OperatorVersionLabel describes version of running binary. |
| OperatorVersionLabel = monitoring.MustCreateLabel("version") |
| |
| // MergeErrorLabel describes the type of merge error. |
| MergeErrorLabel = monitoring.MustCreateLabel("error_type") |
| |
| // RenderErrorLabel describes the type of the error while rendering. |
| RenderErrorLabel = monitoring.MustCreateLabel("render_error") |
| |
| // CRFetchErrorReasonLabel describes the reason/HTTP code |
| // for failing to fetch CR. |
| CRFetchErrorReasonLabel = monitoring.MustCreateLabel("reason") |
| |
| // ComponentNameLabel represents istio component name - like |
| // core, pilot, istio-cni etc. |
| ComponentNameLabel = monitoring.MustCreateLabel("component") |
| |
| // ResourceKindLabel indicates the kind of resource owned |
| // or created or updated or deleted or pruned by operator. |
| ResourceKindLabel = monitoring.MustCreateLabel("kind") |
| ) |
| |
| // MergeErrorType describes the class of errors that could |
| // occur while merging profile, user supplied YAML, values |
| // overridden by --set and so on. |
| type MergeErrorType string |
| |
| const ( |
| // CannotFetchProfileError occurs when profile cannot be found. |
| CannotFetchProfileError MergeErrorType = "cannot_fetch_profile" |
| |
| // OverlayError overlaying YAMLs to combine profile, user |
| // defined settings in CR, Hub-tag etc fails. |
| OverlayError MergeErrorType = "overlay" |
| |
| // IOPFormatError occurs when supplied CR cannot be marshaled |
| // or unmarshaled to/from YAML. |
| IOPFormatError MergeErrorType = "iop_format" |
| |
| // TranslateValuesError occurs when translating from legacy API fails. |
| TranslateValuesError MergeErrorType = "translate_values" |
| |
| // InternalYAMLParseError occurs when spec section in merged CR |
| // cannot be accessed for some reason (either missing or multiple). |
| InternalYAMLParseError MergeErrorType = "internal_yaml_parse" |
| ) |
| |
| // RenderErrorType describes the class of errors that could |
| // occur while rendering Kubernetes manifest from given CR. |
| type RenderErrorType string |
| |
| const ( |
| RenderNotStartedError RenderErrorType = "render_not_started" |
| |
| // HelmTranslateIOPToValuesError describes render error where renderer for |
| // a component cannot create values.yaml tree from given CR. |
| HelmTranslateIOPToValuesError RenderErrorType = "helm_translate_iop_to_values" |
| |
| // HelmChartRenderError describes error where Helm charts cannot be rendered |
| // for the generated values.yaml tree. |
| HelmChartRenderError RenderErrorType = "helm_chart_render" |
| |
| // K8SSettingsOverlayError describes the K8s overlay error after |
| // rendering Helm charts successfully. |
| K8SSettingsOverlayError RenderErrorType = "k8s_settings_overlay" |
| |
| // K8SManifestPatchError describes errors while patching generated manifest. |
| K8SManifestPatchError RenderErrorType = "k8s_manifest_patch" |
| ) |
| |
| var ( |
| // Version is the version of the operator binary running currently. |
| // This is required for fleet level metrics although it is available from |
| // ControlZ (more precisely versionz endpoint). |
| Version = monitoring.NewGauge( |
| "version", |
| "Version of operator binary", |
| monitoring.WithLabels(OperatorVersionLabel), |
| ) |
| |
| // GetCRErrorTotal counts the number of times fetching |
| // CR fails from API server. |
| GetCRErrorTotal = monitoring.NewSum( |
| "get_cr_error_total", |
| "Number of times fetching CR from apiserver failed", |
| monitoring.WithLabels(CRFetchErrorReasonLabel), |
| ) |
| |
| // CRMergeFailureTotal counts number of CR merge failures. |
| CRMergeFailureTotal = monitoring.NewSum( |
| "cr_merge_failure_total", |
| "Number of IstioOperator CR merge failures", |
| monitoring.WithLabels(MergeErrorLabel), |
| ) |
| |
| // CRDeletionTotal counts the number of times |
| // IstioOperator CR was deleted. |
| CRDeletionTotal = monitoring.NewSum( |
| "cr_deletion_total", |
| "Number of IstioOperator CR deleted", |
| ) |
| |
| // CRValidationErrorTotal counts the number of CR |
| // validation failures. |
| CRValidationErrorTotal = monitoring.NewSum( |
| "cr_validation_error_total", |
| "Number of IstioOperator CR validation failures", |
| ) |
| |
| // RenderManifestTotal counts the number of manifest |
| // renders at each component level. |
| RenderManifestTotal = monitoring.NewSum( |
| "render_manifest_total", |
| "Number of component manifests rendered", |
| monitoring.WithLabels(ComponentNameLabel), |
| ) |
| |
| // OwnedResourceTotal indicates the number of resources |
| // currently owned by the CR with given name and revision. |
| OwnedResourceTotal = monitoring.NewGauge( |
| "owned_resource_total", |
| "Number of resources currently owned by the operator", |
| monitoring.WithLabels(ResourceKindLabel), |
| ) |
| |
| // ResourceCreationTotal indicates the number of resources |
| // created by the operator for a CR and revision. |
| ResourceCreationTotal = monitoring.NewSum( |
| "resource_creation_total", |
| "Number of resources created by the operator", |
| monitoring.WithLabels(ResourceKindLabel), |
| ) |
| |
| // ResourceUpdateTotal indicates the number of resources updated by |
| // the operator in response to CR updates for a revision. |
| ResourceUpdateTotal = monitoring.NewSum( |
| "resource_update_total", |
| "Number of resources updated by the operator", |
| monitoring.WithLabels(ResourceKindLabel), |
| ) |
| |
| // ResourceDeletionTotal indicates the number of resources deleted |
| // by the operator in response to CR update or delete operation (like |
| // ingress-gateway which was enabled could be disabled and this requires |
| // deleting ingress-gateway deployment). |
| ResourceDeletionTotal = monitoring.NewSum( |
| "resource_deletion_total", |
| "Number of resources deleted by the operator", |
| monitoring.WithLabels(ResourceKindLabel), |
| ) |
| |
| // ResourcePruneTotal indicates the resources pruned as a result of update. |
| ResourcePruneTotal = monitoring.NewSum( |
| "resource_prune_total", |
| "Number of resources pruned by the operator", |
| monitoring.WithLabels(ResourceKindLabel), |
| ) |
| |
| // ManifestPatchErrorTotal counts the total number of K8S patch errors. |
| ManifestPatchErrorTotal = monitoring.NewSum( |
| "manifest_patch_error_total", |
| "Number of times K8S patch overlays failed", |
| ) |
| |
| // ManifestRenderErrorTotal counts errors occurred while rendering manifest. |
| ManifestRenderErrorTotal = monitoring.NewSum( |
| "manifest_render_error_total", |
| "Number of times error occurred during rendering output manifest", |
| monitoring.WithLabels(ComponentNameLabel, RenderErrorLabel), |
| ) |
| |
| // LegacyPathTranslationTotal counts the translations from legacy API to new one. |
| LegacyPathTranslationTotal = monitoring.NewSum( |
| "legacy_path_translation_total", |
| "Number of times a legacy API path is translated", |
| ) |
| |
| // CacheFlushTotal counts number of cache flushes. |
| CacheFlushTotal = monitoring.NewSum( |
| "cache_flush_total", |
| "number of times operator cache was flushed", |
| ) |
| ) |
| |
| func init() { |
| monitoring.MustRegister( |
| Version, |
| |
| GetCRErrorTotal, |
| CRMergeFailureTotal, |
| CRValidationErrorTotal, |
| CRDeletionTotal, |
| RenderManifestTotal, |
| |
| OwnedResourceTotal, |
| ResourceCreationTotal, |
| ResourceUpdateTotal, |
| ResourceDeletionTotal, |
| ResourcePruneTotal, |
| |
| ManifestPatchErrorTotal, |
| ManifestRenderErrorTotal, |
| LegacyPathTranslationTotal, |
| CacheFlushTotal, |
| ) |
| |
| initOperatorCrdResourceMetrics() |
| } |