flux-charts/infra-certificates/templates/prometheus-rule.yaml

55 lines
2.3 KiB
YAML

{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
{{- $without := "instance,endpoint,container,pod,service,job,namespace" }}
---
apiVersion: "monitoring.coreos.com/v1"
kind: "PrometheusRule"
metadata:
name: "cert-manager"
labels:
{{- toYaml .Values.commons.prometheus.rules.labels | nindent 4 }}
spec:
groups:
- name: "CertManager"
rules:
- alert: "CertificateAboutToExpire"
expr: '(min(certmanager_certificate_expiration_timestamp_seconds - time()) without ({{ $without }}) < 86400)'
for: "1m"
labels:
severity: "critical"
detectedBy: "CertManager"
{{`
annotations:
summary: "SSL certificate {{ $labels.name }} in namespace {{ $labels.exported_namespace }} by {{ $labels.issuer_kind }} {{ $labels.issuer_name }} will expire in {{ $value | humanizeDuration }}"
`}}
- alert: "CertificateAboutToExpire"
expr: '(min(certmanager_certificate_expiration_timestamp_seconds - time()) without ({{ $without }}) < 86400 * 6)'
for: "1m"
labels:
severity: "warning"
detectedBy: "CertManager"
{{`
annotations:
summary: "SSL certificate {{ $labels.name }} in namespace {{ $labels.exported_namespace }} by {{ $labels.issuer_kind }} {{ $labels.issuer_name }} will expire in {{ $value | humanizeDuration }}."
`}}
- alert: "CertManager CertificateReady"
expr: '(sum(certmanager_certificate_ready_status{condition!="True"}) without ({{ $without }}, condition) > 0)'
for: "1m"
labels:
severity: "critical"
{{`
annotations:
summary: "Certificate {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is not ready by {{ $labels.issuer_kind }} {{ $labels.issuer_name }}"
`}}
- alert: "CertManager HittingRateLimits"
expr: '(sum (rate(certmanager_http_acme_client_request_count{status="429"}[5m])) without ({{ $without }}) > 0)'
for: "1m"
labels:
severity: "critical"
{{`
annotations:
summary: "Cert manager hitting rate limits for {{ $labels.host }}"
`}}
{{- end }}{{/* end-if */}}