diff --git a/base-values/infra.yaml b/base-values/infra.yaml index 262685a..80120be 100644 --- a/base-values/infra.yaml +++ b/base-values/infra.yaml @@ -17,6 +17,9 @@ components: name: "flux-system" skip_create: true values: + prometheus: + kubeStateMetricsConfig: + namespace: "monitoring" grafana: dashboards: annotations: @@ -46,6 +49,12 @@ components: enabled: true namespace: name: "monitoring" + values: + prometheus: + valuesFrom: + - kind: ConfigMap + name: fluxcd-kube-state-metrics-config + valuesKey: kube-state-metrics-config.yaml infra-trivy: enabled: true diff --git a/infra-fluxcd/README.adoc b/infra-fluxcd/README.adoc index 25d1397..606499a 100644 --- a/infra-fluxcd/README.adoc +++ b/infra-fluxcd/README.adoc @@ -50,6 +50,11 @@ image::https://img.shields.io/badge/Version-application-informational?style=flat | int | `0` | + +| prometheus.kubeStateMetricsConfig.namespace +| string +| `nil` +| |=== Autogenerated from chart metadata using https://github.com/norwoodj/helm-docs[helm-docs] diff --git a/infra-fluxcd/grafana_dashboards/cluster.json b/infra-fluxcd/files/grafana_dashboards/cluster.json similarity index 100% rename from infra-fluxcd/grafana_dashboards/cluster.json rename to infra-fluxcd/files/grafana_dashboards/cluster.json diff --git a/infra-fluxcd/grafana_dashboards/control-plane.json b/infra-fluxcd/files/grafana_dashboards/control-plane.json similarity index 96% rename from infra-fluxcd/grafana_dashboards/control-plane.json rename to infra-fluxcd/files/grafana_dashboards/control-plane.json index 588c455..d47d391 100644 --- a/infra-fluxcd/grafana_dashboards/control-plane.json +++ b/infra-fluxcd/files/grafana_dashboards/control-plane.json @@ -1437,51 +1437,86 @@ "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { "uid": "${DS_PROMETHEUS}" }, - "decimals": 2, "description": "", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 70 }, - "hiddenSeries": false, "id": 5, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, "pluginVersion": "10.0.3", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, "targets": [ { "datasource": { @@ -1504,37 +1539,8 @@ "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Helm Releases ops/min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1102", - "format": "opm", - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:1103", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "datasource": { diff --git a/infra-fluxcd/grafana_dashboards/logs.json b/infra-fluxcd/files/grafana_dashboards/logs.json similarity index 100% rename from infra-fluxcd/grafana_dashboards/logs.json rename to infra-fluxcd/files/grafana_dashboards/logs.json diff --git a/infra-fluxcd/files/kube-state-metrics-config.yaml b/infra-fluxcd/files/kube-state-metrics-config.yaml new file mode 100644 index 0000000..e0694ad --- /dev/null +++ b/infra-fluxcd/files/kube-state-metrics-config.yaml @@ -0,0 +1,275 @@ +kube-state-metrics: + # For kube-prometheus-stacks that are already installed and configured with + # custom collectors, commenting out the collectors and extraArgs below will + # retain any existing kube-state-metrics configuration. + collectors: [ ] + extraArgs: + - --custom-resource-state-only=true + rbac: + extraRules: + - apiGroups: + - source.toolkit.fluxcd.io + - kustomize.toolkit.fluxcd.io + - helm.toolkit.fluxcd.io + - notification.toolkit.fluxcd.io + - image.toolkit.fluxcd.io + resources: + - gitrepositories + - buckets + - helmrepositories + - helmcharts + - ocirepositories + - kustomizations + - helmreleases + - alerts + - providers + - receivers + - imagerepositories + - imagepolicies + - imageupdateautomations + verbs: [ "list", "watch" ] + customResourceState: + enabled: true + config: + spec: + resources: + - groupVersionKind: + group: kustomize.toolkit.fluxcd.io + version: v1 + kind: Kustomization + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, lastAppliedRevision ] + source_name: [ spec, sourceRef, name ] + - groupVersionKind: + group: helm.toolkit.fluxcd.io + version: v2beta2 + kind: HelmRelease + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, lastAppliedRevision ] + chart_name: [ spec, chart, spec, chart ] + chart_source_name: [ spec, chart, spec, sourceRef, name ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: GitRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + url: [ spec, url ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1beta2 + kind: Bucket + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + endpoint: [ spec, endpoint ] + bucket_name: [ spec, bucketName ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1beta2 + kind: HelmRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + url: [ spec, url ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1beta2 + kind: HelmChart + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + chart_name: [ spec, chart ] + chart_version: [ spec, version ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1beta2 + kind: OCIRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + url: [ spec, url ] + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1beta3 + kind: Alert + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + suspended: [ spec, suspend ] + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1beta3 + kind: Provider + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + suspended: [ spec, suspend ] + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1 + kind: Receiver + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + webhook_path: [ status, webhookPath ] + - groupVersionKind: + group: image.toolkit.fluxcd.io + version: v1beta2 + kind: ImageRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + image: [ spec, image ] + - groupVersionKind: + group: image.toolkit.fluxcd.io + version: v1beta2 + kind: ImagePolicy + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + source_name: [ spec, imageRepositoryRef, name ] + - groupVersionKind: + group: image.toolkit.fluxcd.io + version: v1beta1 + kind: ImageUpdateAutomation + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a GitOps Toolkit resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + source_name: [ spec, sourceRef, name ] diff --git a/infra-fluxcd/templates/configmap_grafana_dashboards.yaml b/infra-fluxcd/templates/configmap_grafana_dashboards.yaml index 834b145..98d90fc 100644 --- a/infra-fluxcd/templates/configmap_grafana_dashboards.yaml +++ b/infra-fluxcd/templates/configmap_grafana_dashboards.yaml @@ -1,4 +1,4 @@ -{{- range $path, $bytes := .Files.Glob "grafana_dashboards/*.json" }} +{{- range $path, $bytes := .Files.Glob "files/grafana_dashboards/*.json" }} --- apiVersion: v1 kind: ConfigMap diff --git a/infra-fluxcd/templates/configmap_kube-state-metrics-config.yaml b/infra-fluxcd/templates/configmap_kube-state-metrics-config.yaml new file mode 100644 index 0000000..370b63f --- /dev/null +++ b/infra-fluxcd/templates/configmap_kube-state-metrics-config.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluxcd-kube-state-metrics-config + namespace: "{{ .Values.prometheus.kubeStateMetricsConfig.namespace | default .Release.Namespace }}" +data: + {{- (.Files.Glob "files/kube-state-metrics-config.yaml" ).AsConfig | nindent 2 }} diff --git a/infra-fluxcd/templates/prometheus-rule.yaml b/infra-fluxcd/templates/prometheus-rule.yaml index 2c86feb..43b19b3 100644 --- a/infra-fluxcd/templates/prometheus-rule.yaml +++ b/infra-fluxcd/templates/prometheus-rule.yaml @@ -1,5 +1,6 @@ -{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }} --- +{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }} +{{- $filter := "endpoint,instance,container,pod,namespace,job,service,chart_version,ready,customresource_group,customresource_version" }} apiVersion: "monitoring.coreos.com/v1" kind: "PrometheusRule" metadata: @@ -11,7 +12,31 @@ spec: - name: "FluxCD" rules: - alert: "FluxCD: source fetch failed" - expr: 'gotk_reconcile_condition{status="False",kind=~"GitRepository|HelmRepository|Bucket"} > 0' + expr: 'sum(gotk_resource_info{customresource_group="source.toolkit.fluxcd.io",ready!="True",ready=~".+"}) without ({{ $filter }}) > 0' + for: "5m" + labels: + severity: "warning" + {{` + annotations: + summary: "FluxCD has not fetched a source in {{ $labels.exported_namespace }} correct" + description: "FluxCD has not fetched the source {{ $labels.name }} of {{ $labels.kind }} in {{ $labels.exported_namespace }}" + `}} + + - alert: "FluxCD: install failed" + expr: 'sum(gotk_resource_info{customresource_group!="source.toolkit.fluxcd.io",ready!="True"}) without ({{ $filter }}) > 0' + for: "5m" + labels: + severity: "warning" + {{` + annotations: + summary: "FluxCD has not installed something in {{ $labels.exported_namespace }} correct" + description: "FluxCD has not installed {{ $labels.name }} of {{ $labels.kind }} in {{ $labels.exported_namespace }} correct" + `}} + + - name: "FluxCD - legacy" + rules: + - alert: "FluxCD: source fetch failed" + expr: 'gotk_reconcile_condition{status="False",kind=~"GitRepository|HelmRepository|HelmChart|Bucket"} > 0' for: "5m" labels: severity: "warning" diff --git a/infra-fluxcd/values.yaml b/infra-fluxcd/values.yaml index af17846..b60dd6d 100644 --- a/infra-fluxcd/values.yaml +++ b/infra-fluxcd/values.yaml @@ -13,6 +13,10 @@ commons: rules: labels: {} +prometheus: + kubeStateMetricsConfig: + namespace: + grafana: dashboards: annotations: {} diff --git a/infra-monitoring/templates/kube-prometheus-stack/release.yaml b/infra-monitoring/templates/kube-prometheus-stack/release.yaml index 48413d9..ccd520d 100644 --- a/infra-monitoring/templates/kube-prometheus-stack/release.yaml +++ b/infra-monitoring/templates/kube-prometheus-stack/release.yaml @@ -17,8 +17,22 @@ spec: upgrade: {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} driftDetection: - {{- toYaml .Values.commons.helm.release.driftDetection | nindent 4 }} + {{- with .Values.commons.helm.release.driftDetection.mode }} + mode: {{ . }} + {{- end }} + ignore: + - target: + kind: PrometheusRule + paths: + - "/metadata/annotations/prometheus-operator-validated" + {{- with .Values.commons.helm.release.driftDetection.ignore }} + {{- toYaml . | nindent 6 }} + {{- end }} interval: 10m + {{- with .Values.prometheus.valuesFrom }} + valuesFrom: + {{- toYaml . | nindent 4 }} + {{- end }} values: commonLabels: prometheus: "default"