fix(infra-fluxcd): new metrics
This commit is contained in:
parent
b5d3256183
commit
706d2ca181
11 changed files with 411 additions and 65 deletions
|
@ -17,6 +17,9 @@ components:
|
|||
name: "flux-system"
|
||||
skip_create: true
|
||||
values:
|
||||
prometheus:
|
||||
kubeStateMetricsConfig:
|
||||
namespace: "monitoring"
|
||||
grafana:
|
||||
dashboards:
|
||||
annotations:
|
||||
|
@ -46,6 +49,12 @@ components:
|
|||
enabled: true
|
||||
namespace:
|
||||
name: "monitoring"
|
||||
values:
|
||||
prometheus:
|
||||
valuesFrom:
|
||||
- kind: ConfigMap
|
||||
name: fluxcd-kube-state-metrics-config
|
||||
valuesKey: kube-state-metrics-config.yaml
|
||||
|
||||
infra-trivy:
|
||||
enabled: true
|
||||
|
|
|
@ -50,6 +50,11 @@ image::https://img.shields.io/badge/Version-application-informational?style=flat
|
|||
| int
|
||||
| `0`
|
||||
|
|
||||
|
||||
| prometheus.kubeStateMetricsConfig.namespace
|
||||
| string
|
||||
| `nil`
|
||||
|
|
||||
|===
|
||||
|
||||
Autogenerated from chart metadata using https://github.com/norwoodj/helm-docs[helm-docs]
|
||||
|
|
|
@ -1437,51 +1437,86 @@
|
|||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": true,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": {
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"decimals": 2,
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "stepAfter",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "opm"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 70
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": false,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.0.3",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": true,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -1504,37 +1539,8 @@
|
|||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeRegions": [],
|
||||
"title": "Helm Releases ops/min",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:1102",
|
||||
"format": "opm",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:1103",
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false
|
||||
}
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
275
infra-fluxcd/files/kube-state-metrics-config.yaml
Normal file
275
infra-fluxcd/files/kube-state-metrics-config.yaml
Normal file
|
@ -0,0 +1,275 @@
|
|||
kube-state-metrics:
|
||||
# For kube-prometheus-stacks that are already installed and configured with
|
||||
# custom collectors, commenting out the collectors and extraArgs below will
|
||||
# retain any existing kube-state-metrics configuration.
|
||||
collectors: [ ]
|
||||
extraArgs:
|
||||
- --custom-resource-state-only=true
|
||||
rbac:
|
||||
extraRules:
|
||||
- apiGroups:
|
||||
- source.toolkit.fluxcd.io
|
||||
- kustomize.toolkit.fluxcd.io
|
||||
- helm.toolkit.fluxcd.io
|
||||
- notification.toolkit.fluxcd.io
|
||||
- image.toolkit.fluxcd.io
|
||||
resources:
|
||||
- gitrepositories
|
||||
- buckets
|
||||
- helmrepositories
|
||||
- helmcharts
|
||||
- ocirepositories
|
||||
- kustomizations
|
||||
- helmreleases
|
||||
- alerts
|
||||
- providers
|
||||
- receivers
|
||||
- imagerepositories
|
||||
- imagepolicies
|
||||
- imageupdateautomations
|
||||
verbs: [ "list", "watch" ]
|
||||
customResourceState:
|
||||
enabled: true
|
||||
config:
|
||||
spec:
|
||||
resources:
|
||||
- groupVersionKind:
|
||||
group: kustomize.toolkit.fluxcd.io
|
||||
version: v1
|
||||
kind: Kustomization
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, lastAppliedRevision ]
|
||||
source_name: [ spec, sourceRef, name ]
|
||||
- groupVersionKind:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: v2beta2
|
||||
kind: HelmRelease
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, lastAppliedRevision ]
|
||||
chart_name: [ spec, chart, spec, chart ]
|
||||
chart_source_name: [ spec, chart, spec, sourceRef, name ]
|
||||
- groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: v1
|
||||
kind: GitRepository
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, artifact, revision ]
|
||||
url: [ spec, url ]
|
||||
- groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: v1beta2
|
||||
kind: Bucket
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, artifact, revision ]
|
||||
endpoint: [ spec, endpoint ]
|
||||
bucket_name: [ spec, bucketName ]
|
||||
- groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: v1beta2
|
||||
kind: HelmRepository
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, artifact, revision ]
|
||||
url: [ spec, url ]
|
||||
- groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: v1beta2
|
||||
kind: HelmChart
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, artifact, revision ]
|
||||
chart_name: [ spec, chart ]
|
||||
chart_version: [ spec, version ]
|
||||
- groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: v1beta2
|
||||
kind: OCIRepository
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
revision: [ status, artifact, revision ]
|
||||
url: [ spec, url ]
|
||||
- groupVersionKind:
|
||||
group: notification.toolkit.fluxcd.io
|
||||
version: v1beta3
|
||||
kind: Alert
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
suspended: [ spec, suspend ]
|
||||
- groupVersionKind:
|
||||
group: notification.toolkit.fluxcd.io
|
||||
version: v1beta3
|
||||
kind: Provider
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
suspended: [ spec, suspend ]
|
||||
- groupVersionKind:
|
||||
group: notification.toolkit.fluxcd.io
|
||||
version: v1
|
||||
kind: Receiver
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
webhook_path: [ status, webhookPath ]
|
||||
- groupVersionKind:
|
||||
group: image.toolkit.fluxcd.io
|
||||
version: v1beta2
|
||||
kind: ImageRepository
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
image: [ spec, image ]
|
||||
- groupVersionKind:
|
||||
group: image.toolkit.fluxcd.io
|
||||
version: v1beta2
|
||||
kind: ImagePolicy
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
source_name: [ spec, imageRepositoryRef, name ]
|
||||
- groupVersionKind:
|
||||
group: image.toolkit.fluxcd.io
|
||||
version: v1beta1
|
||||
kind: ImageUpdateAutomation
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [ metadata, name ]
|
||||
labelsFromPath:
|
||||
exported_namespace: [ metadata, namespace ]
|
||||
ready: [ status, conditions, "[type=Ready]", status ]
|
||||
suspended: [ spec, suspend ]
|
||||
source_name: [ spec, sourceRef, name ]
|
|
@ -1,4 +1,4 @@
|
|||
{{- range $path, $bytes := .Files.Glob "grafana_dashboards/*.json" }}
|
||||
{{- range $path, $bytes := .Files.Glob "files/grafana_dashboards/*.json" }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: fluxcd-kube-state-metrics-config
|
||||
namespace: "{{ .Values.prometheus.kubeStateMetricsConfig.namespace | default .Release.Namespace }}"
|
||||
data:
|
||||
{{- (.Files.Glob "files/kube-state-metrics-config.yaml" ).AsConfig | nindent 2 }}
|
|
@ -1,5 +1,6 @@
|
|||
{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
|
||||
---
|
||||
{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
|
||||
{{- $filter := "endpoint,instance,container,pod,namespace,job,service,chart_version,ready,customresource_group,customresource_version" }}
|
||||
apiVersion: "monitoring.coreos.com/v1"
|
||||
kind: "PrometheusRule"
|
||||
metadata:
|
||||
|
@ -11,7 +12,31 @@ spec:
|
|||
- name: "FluxCD"
|
||||
rules:
|
||||
- alert: "FluxCD: source fetch failed"
|
||||
expr: 'gotk_reconcile_condition{status="False",kind=~"GitRepository|HelmRepository|Bucket"} > 0'
|
||||
expr: 'sum(gotk_resource_info{customresource_group="source.toolkit.fluxcd.io",ready!="True",ready=~".+"}) without ({{ $filter }}) > 0'
|
||||
for: "5m"
|
||||
labels:
|
||||
severity: "warning"
|
||||
{{`
|
||||
annotations:
|
||||
summary: "FluxCD has not fetched a source in {{ $labels.exported_namespace }} correct"
|
||||
description: "FluxCD has not fetched the source {{ $labels.name }} of {{ $labels.kind }} in {{ $labels.exported_namespace }}"
|
||||
`}}
|
||||
|
||||
- alert: "FluxCD: install failed"
|
||||
expr: 'sum(gotk_resource_info{customresource_group!="source.toolkit.fluxcd.io",ready!="True"}) without ({{ $filter }}) > 0'
|
||||
for: "5m"
|
||||
labels:
|
||||
severity: "warning"
|
||||
{{`
|
||||
annotations:
|
||||
summary: "FluxCD has not installed something in {{ $labels.exported_namespace }} correct"
|
||||
description: "FluxCD has not installed {{ $labels.name }} of {{ $labels.kind }} in {{ $labels.exported_namespace }} correct"
|
||||
`}}
|
||||
|
||||
- name: "FluxCD - legacy"
|
||||
rules:
|
||||
- alert: "FluxCD: source fetch failed"
|
||||
expr: 'gotk_reconcile_condition{status="False",kind=~"GitRepository|HelmRepository|HelmChart|Bucket"} > 0'
|
||||
for: "5m"
|
||||
labels:
|
||||
severity: "warning"
|
||||
|
|
|
@ -13,6 +13,10 @@ commons:
|
|||
rules:
|
||||
labels: {}
|
||||
|
||||
prometheus:
|
||||
kubeStateMetricsConfig:
|
||||
namespace:
|
||||
|
||||
grafana:
|
||||
dashboards:
|
||||
annotations: {}
|
||||
|
|
|
@ -17,8 +17,22 @@ spec:
|
|||
upgrade:
|
||||
{{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }}
|
||||
driftDetection:
|
||||
{{- toYaml .Values.commons.helm.release.driftDetection | nindent 4 }}
|
||||
{{- with .Values.commons.helm.release.driftDetection.mode }}
|
||||
mode: {{ . }}
|
||||
{{- end }}
|
||||
ignore:
|
||||
- target:
|
||||
kind: PrometheusRule
|
||||
paths:
|
||||
- "/metadata/annotations/prometheus-operator-validated"
|
||||
{{- with .Values.commons.helm.release.driftDetection.ignore }}
|
||||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
interval: 10m
|
||||
{{- with .Values.prometheus.valuesFrom }}
|
||||
valuesFrom:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
values:
|
||||
commonLabels:
|
||||
prometheus: "default"
|
||||
|
|
Loading…
Add table
Reference in a new issue