fix(infra-fluxcd): new metrics

This commit is contained in:
WrenIX 2024-01-27 12:46:09 +01:00
parent b5d3256183
commit 706d2ca181
Signed by: wrenix
GPG key ID: 7AFDB012974B1BB5
11 changed files with 411 additions and 65 deletions

View file

@ -17,6 +17,9 @@ components:
name: "flux-system"
skip_create: true
values:
prometheus:
kubeStateMetricsConfig:
namespace: "monitoring"
grafana:
dashboards:
annotations:
@ -46,6 +49,12 @@ components:
enabled: true
namespace:
name: "monitoring"
values:
prometheus:
valuesFrom:
- kind: ConfigMap
name: fluxcd-kube-state-metrics-config
valuesKey: kube-state-metrics-config.yaml
infra-trivy:
enabled: true

View file

@ -50,6 +50,11 @@ image::https://img.shields.io/badge/Version-application-informational?style=flat
| int
| `0`
|
| prometheus.kubeStateMetricsConfig.namespace
| string
| `nil`
|
|===
Autogenerated from chart metadata using https://github.com/norwoodj/helm-docs[helm-docs]

View file

@ -1437,51 +1437,86 @@
"type": "timeseries"
},
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "${DS_PROMETHEUS}"
},
"decimals": 2,
"description": "",
"fill": 1,
"fillGradient": 0,
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "opm"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 70
},
"hiddenSeries": false,
"id": 5,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": false,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
"legend": {
"calcs": [
"mean",
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "10.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": true,
"targets": [
{
"datasource": {
@ -1504,37 +1539,8 @@
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Helm Releases ops/min",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:1102",
"format": "opm",
"logBase": 1,
"show": true
},
{
"$$hashKey": "object:1103",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
"type": "timeseries"
},
{
"datasource": {

View file

@ -0,0 +1,275 @@
kube-state-metrics:
# For kube-prometheus-stacks that are already installed and configured with
# custom collectors, commenting out the collectors and extraArgs below will
# retain any existing kube-state-metrics configuration.
collectors: [ ]
extraArgs:
- --custom-resource-state-only=true
rbac:
extraRules:
- apiGroups:
- source.toolkit.fluxcd.io
- kustomize.toolkit.fluxcd.io
- helm.toolkit.fluxcd.io
- notification.toolkit.fluxcd.io
- image.toolkit.fluxcd.io
resources:
- gitrepositories
- buckets
- helmrepositories
- helmcharts
- ocirepositories
- kustomizations
- helmreleases
- alerts
- providers
- receivers
- imagerepositories
- imagepolicies
- imageupdateautomations
verbs: [ "list", "watch" ]
customResourceState:
enabled: true
config:
spec:
resources:
- groupVersionKind:
group: kustomize.toolkit.fluxcd.io
version: v1
kind: Kustomization
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, lastAppliedRevision ]
source_name: [ spec, sourceRef, name ]
- groupVersionKind:
group: helm.toolkit.fluxcd.io
version: v2beta2
kind: HelmRelease
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, lastAppliedRevision ]
chart_name: [ spec, chart, spec, chart ]
chart_source_name: [ spec, chart, spec, sourceRef, name ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1
kind: GitRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
url: [ spec, url ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: Bucket
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
endpoint: [ spec, endpoint ]
bucket_name: [ spec, bucketName ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: HelmRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
url: [ spec, url ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: HelmChart
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
chart_name: [ spec, chart ]
chart_version: [ spec, version ]
- groupVersionKind:
group: source.toolkit.fluxcd.io
version: v1beta2
kind: OCIRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
revision: [ status, artifact, revision ]
url: [ spec, url ]
- groupVersionKind:
group: notification.toolkit.fluxcd.io
version: v1beta3
kind: Alert
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
suspended: [ spec, suspend ]
- groupVersionKind:
group: notification.toolkit.fluxcd.io
version: v1beta3
kind: Provider
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
suspended: [ spec, suspend ]
- groupVersionKind:
group: notification.toolkit.fluxcd.io
version: v1
kind: Receiver
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
webhook_path: [ status, webhookPath ]
- groupVersionKind:
group: image.toolkit.fluxcd.io
version: v1beta2
kind: ImageRepository
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
image: [ spec, image ]
- groupVersionKind:
group: image.toolkit.fluxcd.io
version: v1beta2
kind: ImagePolicy
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
source_name: [ spec, imageRepositoryRef, name ]
- groupVersionKind:
group: image.toolkit.fluxcd.io
version: v1beta1
kind: ImageUpdateAutomation
metricNamePrefix: gotk
metrics:
- name: "resource_info"
help: "The current state of a GitOps Toolkit resource."
each:
type: Info
info:
labelsFromPath:
name: [ metadata, name ]
labelsFromPath:
exported_namespace: [ metadata, namespace ]
ready: [ status, conditions, "[type=Ready]", status ]
suspended: [ spec, suspend ]
source_name: [ spec, sourceRef, name ]

View file

@ -1,4 +1,4 @@
{{- range $path, $bytes := .Files.Glob "grafana_dashboards/*.json" }}
{{- range $path, $bytes := .Files.Glob "files/grafana_dashboards/*.json" }}
---
apiVersion: v1
kind: ConfigMap

View file

@ -0,0 +1,8 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: fluxcd-kube-state-metrics-config
namespace: "{{ .Values.prometheus.kubeStateMetricsConfig.namespace | default .Release.Namespace }}"
data:
{{- (.Files.Glob "files/kube-state-metrics-config.yaml" ).AsConfig | nindent 2 }}

View file

@ -1,5 +1,6 @@
{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
---
{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
{{- $filter := "endpoint,instance,container,pod,namespace,job,service,chart_version,ready,customresource_group,customresource_version" }}
apiVersion: "monitoring.coreos.com/v1"
kind: "PrometheusRule"
metadata:
@ -11,7 +12,31 @@ spec:
- name: "FluxCD"
rules:
- alert: "FluxCD: source fetch failed"
expr: 'gotk_reconcile_condition{status="False",kind=~"GitRepository|HelmRepository|Bucket"} > 0'
expr: 'sum(gotk_resource_info{customresource_group="source.toolkit.fluxcd.io",ready!="True",ready=~".+"}) without ({{ $filter }}) > 0'
for: "5m"
labels:
severity: "warning"
{{`
annotations:
summary: "FluxCD has not fetched a source in {{ $labels.exported_namespace }} correct"
description: "FluxCD has not fetched the source {{ $labels.name }} of {{ $labels.kind }} in {{ $labels.exported_namespace }}"
`}}
- alert: "FluxCD: install failed"
expr: 'sum(gotk_resource_info{customresource_group!="source.toolkit.fluxcd.io",ready!="True"}) without ({{ $filter }}) > 0'
for: "5m"
labels:
severity: "warning"
{{`
annotations:
summary: "FluxCD has not installed something in {{ $labels.exported_namespace }} correct"
description: "FluxCD has not installed {{ $labels.name }} of {{ $labels.kind }} in {{ $labels.exported_namespace }} correct"
`}}
- name: "FluxCD - legacy"
rules:
- alert: "FluxCD: source fetch failed"
expr: 'gotk_reconcile_condition{status="False",kind=~"GitRepository|HelmRepository|HelmChart|Bucket"} > 0'
for: "5m"
labels:
severity: "warning"

View file

@ -13,6 +13,10 @@ commons:
rules:
labels: {}
prometheus:
kubeStateMetricsConfig:
namespace:
grafana:
dashboards:
annotations: {}

View file

@ -17,8 +17,22 @@ spec:
upgrade:
{{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }}
driftDetection:
{{- toYaml .Values.commons.helm.release.driftDetection | nindent 4 }}
{{- with .Values.commons.helm.release.driftDetection.mode }}
mode: {{ . }}
{{- end }}
ignore:
- target:
kind: PrometheusRule
paths:
- "/metadata/annotations/prometheus-operator-validated"
{{- with .Values.commons.helm.release.driftDetection.ignore }}
{{- toYaml . | nindent 6 }}
{{- end }}
interval: 10m
{{- with .Values.prometheus.valuesFrom }}
valuesFrom:
{{- toYaml . | nindent 4 }}
{{- end }}
values:
commonLabels:
prometheus: "default"