diff --git a/infra-certificates/grafana_dashboards/README.adoc b/infra-certificates/grafana_dashboards/README.adoc new file mode 100644 index 0000000..75d25a1 --- /dev/null +++ b/infra-certificates/grafana_dashboards/README.adoc @@ -0,0 +1,4 @@ += Cert-Manager + +Dashboard downloaded from: +https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/eae22f642aaa5d422e4766f6811df2158fc05539/dashboards/cert-manager.json diff --git a/infra-certificates/grafana_dashboards/certmanager.json b/infra-certificates/grafana_dashboards/certmanager.json new file mode 100644 index 0000000..bc7b28e --- /dev/null +++ b/infra-certificates/grafana_dashboards/certmanager.json @@ -0,0 +1,1203 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": 59, + "iteration": 1616445892702, + "links": [], + "panels": [ + { + "datasource": "$datasource", + "description": "The number of certificates in the ready state.", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "True" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "expr": "sum by (condition) (certmanager_certificate_ready_status)", + "interval": "", + "legendFormat": "{{condition}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Certificates Ready", + "type": "stat" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 604800 + }, + { + "color": "green", + "value": 1209600 + } + ] + }, + "unit": "dtdurations" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "expr": "min(certmanager_certificate_expiration_timestamp_seconds > 0) - time()", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "vector(1250000)", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Soonest Cert Expiry", + "type": "stat" + }, + { + "datasource": "$datasource", + "description": "Status of the certificates. Values are True, False or Unknown.", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "filterable": false + }, + "mappings": [ + { + "from": "", + "id": 0, + "operator": "", + "text": "Yes", + "to": "", + "type": 1, + "value": "" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Ready Status" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Valid Until" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Valid Until" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeAsIso" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 9, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Valid Until" + } + ] + }, + "pluginVersion": "7.4.5", + "targets": [ + { + "expr": "label_join(avg by (name, namespace, condition, exported_namespace) (certmanager_certificate_ready_status == 1), \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "label_join(avg by (name, namespace, exported_namespace) (certmanager_certificate_expiration_timestamp_seconds) * 1000, \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Certificates", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "namespaced_name" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Value #A": true, + "exported_namespace": false, + "exported_namespace 1": false, + "exported_namespace 2": true, + "name 1": true, + "namespaced_name": true, + "namespace 2": true + }, + "indexByName": { + "Time 1": 8, + "Time 2": 10, + "Value #A": 6, + "Value #B": 5, + "condition": 4, + "exported_namespace 1": 1, + "exported_namespace 2": 11, + "name 1": 9, + "name 2": 3, + "namespace": 0, + "namespaced_name": 7, + "namespace 1": 2 + }, + "renameByName": { + "Time 1": "", + "Value #B": "Valid Until", + "condition": "Ready Status", + "exported_namespace": "Certificate Namespace", + "exported_namespace 1": "Certificate Namespace", + "exported_namespace 2": "", + "name": "Certificate", + "name 2": "Certificate", + "namespace": "Namespace", + "namespaced_name": "", + "namespace 1": "Namespace" + } + } + } + ], + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "The rate of controller sync requests.", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 7, + "interval": "20s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (controller) (\n rate(certmanager_controller_sync_call_count[$__rate_interval])\n)", + "interval": "", + "legendFormat": "{{controller}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Controller Sync Requests/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Rate of requests to ACME provider.", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 6, + "interval": "20s", + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (method, path, status) (\n rate(certmanager_http_acme_client_request_count[$__rate_interval])\n)", + "interval": "", + "legendFormat": "{{method}} {{path}} {{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ACME HTTP Requests/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Average duration of requests to ACME provider. ", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 10, + "interval": "30s", + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_sum[$__rate_interval]))\n/\nsum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_count[$__rate_interval]))", + "interval": "", + "legendFormat": "{{method}} {{path}} {{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ACME HTTP Request avg duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "CPU Usage and limits, as percent of a vCPU core. ", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 12, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "CPU", + "fill": 1, + "fillGradient": 5 + }, + { + "alias": "/Request.*/", + "color": "#FF9830", + "dashes": true + }, + { + "alias": "/Limit.*/", + "color": "#F2495C", + "dashes": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (pod) (rate(container_cpu_usage_seconds_total{container=\"cert-manager\"}[$__rate_interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "CPU {{pod}}", + "refId": "A" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_limits_cpu_cores{container=\"cert-manager\"})", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit {{pod}}", + "refId": "B" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_requests_cpu_cores{container=\"cert-manager\"})", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request {{pod}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Percent of the time that the CPU is being throttled. Higher is badderer. ", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 24 + }, + "hiddenSeries": false, + "id": 14, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": 250, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/external-dns.*/", + "fill": 1, + "fillGradient": 5 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (pod) (\n rate(container_cpu_cfs_throttled_periods_total{container=\"cert-manager\"}[$__rate_interval])\n /\n rate(container_cpu_cfs_periods_total{container=\"cert-manager\"}[$__rate_interval])\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Throttling", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Memory utilisation and limits.", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 24 + }, + "hiddenSeries": false, + "id": 16, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": 250, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Memory", + "fill": 1, + "fillGradient": 5 + }, + { + "alias": "Request", + "color": "#FF9830", + "dashes": true + }, + { + "alias": "Limit", + "color": "#F2495C", + "dashes": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg by (pod) (container_memory_usage_bytes{container=\"cert-manager\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Memory {{pod}}", + "refId": "A" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_limits_memory_bytes{container=\"cert-manager\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit {{pod}}", + "refId": "B" + }, + { + "expr": "avg by (pod) (kube_pod_container_resource_requests_memory_bytes{container=\"cert-manager\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request {{pod}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "dark-yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Network ingress/egress.", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 24 + }, + "hiddenSeries": false, + "id": 18, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmit", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(\n sum without (interface) (\n rate(container_network_receive_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "receive", + "refId": "A" + }, + { + "expr": "avg(\n sum without (interface) (\n rate(container_network_transmit_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "transmit", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "cert-manager", + "infra" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "prometheus", + "value": "prometheus" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Cert Manager", + "uid": "TvuRo2iMk", + "version": 1 +} diff --git a/infra-certificates/templates/configmap_grafana_dashboards.yaml b/infra-certificates/templates/configmap_grafana_dashboards.yaml new file mode 100644 index 0000000..e2f3c9d --- /dev/null +++ b/infra-certificates/templates/configmap_grafana_dashboards.yaml @@ -0,0 +1,15 @@ +{{- range $path, $bytes := $.Files.Glob "grafana_dashboards/*.json" }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards-{{ base $path }} + labels: + {{- toYaml $.Values.commons.grafana.dashboards.labels | nindent 4 }} + {{- with $.Values.commons.grafana.dashboards.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +data: + {{- ($.Files.Glob $path ).AsConfig | nindent 2 }} +{{- end }} diff --git a/infra-certificates/templates/configmap_init_crd.yaml b/infra-certificates/templates/configmap_init_crd.yaml index 0795119..386fe0a 100644 --- a/infra-certificates/templates/configmap_init_crd.yaml +++ b/infra-certificates/templates/configmap_init_crd.yaml @@ -5,7 +5,10 @@ metadata: name: {{ .Release.Name }}-init namespace: "{{ .Values.init.namespace }}" data: - {{- $isMonitoring := (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} + {{- $isMonitoring := and + (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") + (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") + }} monitoring: {{ $isMonitoring | quote }} {{- $isCertManager := (.Capabilities.APIVersions.Has "cert-manager.io/v1/ClusterIssuer") }} certmanager: {{ $isCertManager | quote }} diff --git a/infra-certificates/templates/prometheus-rule.yaml b/infra-certificates/templates/prometheus-rule.yaml new file mode 100644 index 0000000..76eedc9 --- /dev/null +++ b/infra-certificates/templates/prometheus-rule.yaml @@ -0,0 +1,55 @@ +{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }} +{{- $without := "instance,endpoint,container,pod,service,job,namespace" }} +--- +apiVersion: "monitoring.coreos.com/v1" +kind: "PrometheusRule" +metadata: + name: "cert-manager" + labels: + {{- toYaml .Values.commons.prometheus.rules.labels | nindent 4 }} +spec: + groups: + - name: "CertManager" + rules: + - alert: "CertificateAboutToExpire" + expr: '(min(certmanager_certificate_expiration_timestamp_seconds - time()) without ({{ $without }}) < 86400)' + for: "1m" + labels: + severity: "critical" + detectedBy: "CertManager" + {{` + annotations: + summary: "SSL certificate {{ $labels.name }} in namespace {{ $labels.exported_namespace }} by {{ $labels.issuer_kind }} {{ $labels.issuer_name }} will expire in {{ $value | humanizeDuration }}" + `}} + + - alert: "CertificateAboutToExpire" + expr: '(min(certmanager_certificate_expiration_timestamp_seconds - time()) without ({{ $without }}) < 86400 * 6)' + for: "1m" + labels: + severity: "warning" + detectedBy: "CertManager" + {{` + annotations: + summary: "SSL certificate {{ $labels.name }} in namespace {{ $labels.exported_namespace }} by {{ $labels.issuer_kind }} {{ $labels.issuer_name }} will expire in {{ $value | humanizeDuration }}." + `}} + + - alert: "CertManager CertificateReady" + expr: '(sum(certmanager_certificate_ready_status{condition!="True"}) without ({{ $without }}, condition) > 0)' + for: "1m" + labels: + severity: "critical" + {{` + annotations: + summary: "Certificate {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is not ready by {{ $labels.issuer_kind }} {{ $labels.issuer_name }}" + `}} + + - alert: "CertManager HittingRateLimits" + expr: '(sum (rate(certmanager_http_acme_client_request_count{status="429"}[5m])) without ({{ $without }}) > 0)' + for: "1m" + labels: + severity: "critical" + {{` + annotations: + summary: "Cert manager hitting rate limits for {{ $labels.host }}" + `}} +{{- end }}{{/* end-if */}} diff --git a/infra-certificates/values.yaml b/infra-certificates/values.yaml index f3c4202..e5cdaf8 100644 --- a/infra-certificates/values.yaml +++ b/infra-certificates/values.yaml @@ -13,5 +13,15 @@ commons: prometheus: monitor: labels: {} + rules: + labels: {} + + grafana: + datasource: + labels: + grafana_datasource: "1" + dashboards: + labels: + grafana_dashboard: "1" email: "an@example.org"