From a27e6c489e9430b97c984f9ba85cda29f9e212be Mon Sep 17 00:00:00 2001 From: WrenIX Date: Mon, 27 Nov 2023 09:44:32 +0100 Subject: [PATCH] fix(infra-monitoring): init --- base-values/commons.yaml | 6 +- base-values/infra.yaml | 14 ++ infra-monitoring/.helmignore | 23 ++ infra-monitoring/Chart.yaml | 6 + .../alertmanager/alertmanager-config.yaml | 118 ++++++++++ .../alertmanager/authentik-application.yaml | 59 +++++ .../alertmanager/matrix/release.yaml | 64 +++++ .../templates/alertmanager/matrix/secret.yaml | 11 + .../templates/alertmanager/ntfy/release.yaml | 74 ++++++ .../templates/alertmanager/ntfy/secret.yaml | 12 + .../templates/configmap_init_crd.yaml | 15 ++ .../templates/exporter/blackbox/release.yaml | 183 +++++++++++++++ .../grafana/authentik-application.yaml | 70 ++++++ .../karma/authentik-application.yaml | 60 +++++ infra-monitoring/templates/karma/release.yaml | 138 +++++++++++ infra-monitoring/templates/karma/repo.yaml | 8 + .../kube-prometheus-stack/release.yaml | 222 ++++++++++++++++++ .../prometheus/authentik-application.yaml | 59 +++++ infra-monitoring/templates/repo.yaml | 8 + infra-monitoring/values.yaml | 181 ++++++++++++++ 20 files changed, 1330 insertions(+), 1 deletion(-) create mode 100644 infra-monitoring/.helmignore create mode 100644 infra-monitoring/Chart.yaml create mode 100644 infra-monitoring/templates/alertmanager/alertmanager-config.yaml create mode 100644 infra-monitoring/templates/alertmanager/authentik-application.yaml create mode 100644 infra-monitoring/templates/alertmanager/matrix/release.yaml create mode 100644 infra-monitoring/templates/alertmanager/matrix/secret.yaml create mode 100644 infra-monitoring/templates/alertmanager/ntfy/release.yaml create mode 100644 infra-monitoring/templates/alertmanager/ntfy/secret.yaml create mode 100644 infra-monitoring/templates/configmap_init_crd.yaml create mode 100644 infra-monitoring/templates/exporter/blackbox/release.yaml create mode 100644 infra-monitoring/templates/grafana/authentik-application.yaml create mode 100644 infra-monitoring/templates/karma/authentik-application.yaml create mode 100644 infra-monitoring/templates/karma/release.yaml create mode 100644 infra-monitoring/templates/karma/repo.yaml create mode 100644 infra-monitoring/templates/kube-prometheus-stack/release.yaml create mode 100644 infra-monitoring/templates/prometheus/authentik-application.yaml create mode 100644 infra-monitoring/templates/repo.yaml create mode 100644 infra-monitoring/values.yaml diff --git a/base-values/commons.yaml b/base-values/commons.yaml index 68f582d..4429c5a 100644 --- a/base-values/commons.yaml +++ b/base-values/commons.yaml @@ -1,5 +1,4 @@ commons: - helm: release: install: @@ -15,6 +14,11 @@ commons: domain: "wrenix.eu" annotations: cert-manager.io/cluster-issuer: letsencrypt-prod + tls: + # -- tls on every ingress + enabled: true + # -- use own definition of tls (e.g. for own or wildcard certificate) + override: grafana: datasource: diff --git a/base-values/infra.yaml b/base-values/infra.yaml index db392d4..6f30561 100644 --- a/base-values/infra.yaml +++ b/base-values/infra.yaml @@ -1,3 +1,13 @@ +commons: + masterPassword: + auth: + enabled: false + type: "authentik" + namespace: "" + authentik: + domain: "" + + components: infra-fluxcd: enabled: true @@ -26,6 +36,10 @@ components: annotations: grafana.mon.local/dashboard-folder: "Logging" + infra-monitoring: + enabled: true + namespace: "monitoring" + infra-trivy: enabled: true namespace: "monitoring-trivy" diff --git a/infra-monitoring/.helmignore b/infra-monitoring/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/infra-monitoring/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/infra-monitoring/Chart.yaml b/infra-monitoring/Chart.yaml new file mode 100644 index 0000000..cac3819 --- /dev/null +++ b/infra-monitoring/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: infra-monitoring +description: Install all monitoring related + +type: application +version: 0.1.0 diff --git a/infra-monitoring/templates/alertmanager/alertmanager-config.yaml b/infra-monitoring/templates/alertmanager/alertmanager-config.yaml new file mode 100644 index 0000000..e594f63 --- /dev/null +++ b/infra-monitoring/templates/alertmanager/alertmanager-config.yaml @@ -0,0 +1,118 @@ +--- +apiVersion: "monitoring.coreos.com/v1alpha1" +kind: "AlertmanagerConfig" +metadata: + name: "global-alertmanager-conf" +spec: + receivers: + - name: "null" + {{- with .Values.alertmanager.receiver.ntfy }} + {{- if .enabled }} + - name: "ntfy-default" + webhookConfigs: + - url: "http://alertmanager-ntfy" + sendResolved: {{ .sendResolved }} + {{- if and (.config.user) (.config.password) }} + httpConfig: + basicAuth: + username: + name: global-alertmanager-ntfy-auth + key: username + password: + name: global-alertmanager-ntfy-auth + key: password + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.alertmanager.receiver.matrix }} + {{- if .enabled }} + - name: "matrix-default" + webhookConfigs: + - url: "http://alertmanager-matrix:4051/{{ .default }}" + sendResolved: {{ .sendResolved }} + {{- range $receiver, $conf := .rooms }} + - name: "matrix-{{ $receiver }}" + webhookConfigs: + - url: "http://alertmanager-matrix:4051/{{ $conf.room }}" + sendResolved: {{ $conf.sendResolved }} + {{- end }} + {{- end }} + {{- end }} + inhibitRules: + - sourceMatch: + - name: "alertmanagerInhibitDisable" + matchType: "!=" + value: "true" + - name: "severity" + matchType: "=" + value: "critical" + targetMatch: + - name: "alertmanagerInhibitDisable" + matchType: "!=" + value: "true" + - name: "severity" + matchType: "=~" + value: "warning|info" + equal: + - "namespace" + - "alertname" + - sourceMatch: + - name: "alertmanagerInhibitDisable" + matchType: "!=" + value: "true" + - name: "severity" + matchType: "=" + value: "warning" + targetMatch: + - name: "alertmanagerInhibitDisable" + matchType: "!=" + value: "true" + - name: "severity" + matchType: "=" + value: "info" + equal: + - "namespace" + - "alertname" + - sourceMatch: + - name: "alertmanagerInhibitDisable" + matchType: "!=" + value: "true" + - name: "alertname" + matchType: "=" + value: "InfoInhibitor" + targetMatch: + - name: "alertmanagerInhibitDisable" + matchType: "!=" + value: "true" + - name: "severity" + matchType: "=" + value: "info" + equal: + - "namespace" + route: + groupWait: "30s" + groupInterval: "10m" + groupBy: + - "alertname" + receiver: "null" + repeatInterval: {{ .Values.alertmanager.receiver.repeatInterval }} + routes: + - receiver: "null" + matchers: + - name: "alertname" + matchType: "=~" + value: "InfoInhibitor|Watchdog" + {{- with .Values.alertmanager.receiver.routes }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- with .Values.alertmanager.receiver }} + {{- if .ntfy.enabled }} + - receiver: "ntfy-default" + continue: true + {{- end }} + {{- if .matrix.enabled }} + - receiver: "matrix-default" + continue: true + {{- end }} + {{- end }}{{/* end-with receiver */}} + {{- end }}{{/* end-with routes */}} diff --git a/infra-monitoring/templates/alertmanager/authentik-application.yaml b/infra-monitoring/templates/alertmanager/authentik-application.yaml new file mode 100644 index 0000000..0311a2f --- /dev/null +++ b/infra-monitoring/templates/alertmanager/authentik-application.yaml @@ -0,0 +1,59 @@ +{{- if and + .Values.commons.auth.enabled (eq .Values.commons.auth.type "authentik") + .Values.alertmanager.ingress.enabled +}} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: authentik-application-alertmanager +spec: + chart: + spec: + sourceRef: + kind: GitRepository + name: "wrenix-helm-charts" + namespace: "flux-system" + chart: "./authentik-application" + reconcileStrategy: "Revision" + interval: 10m + releaseName: authentik-application-infra-alertmanager + targetNamespace: {{ .Values.commons.auth.namespace }} + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + {{- $domain := .Values.alertmanager.ingress.host | default (printf "alertmanager.%s" .Values.commons.ingress.domain) }} + {{- $url := printf "https://%s" $domain }} + blueprint: + authentik: + domain: "https://{{ .Values.commons.auth.authentik.domain }}" + provider: + enabled: {{ not .Values.alertmanager.auth.anonymous.enabled }} + proxy: + externalHost: {{ $url | quote }} + skipPathRegex: | + /favicon.ico + ingress: + enabled: true + domain: {{ $domain | quote }} + + groups: + - slug: "app: infra" + bindID: "0e71f524-6fb5-43a5-9f60-95d4e103e390" + + application: + group: "Infrastructure" + policyEngineMode: "any" + openInNewTab: true + publisher: "WrenIX's Infra" + slug: "infra-alertmanager" + name: "Alertmanager" + launchURL: {{ $url | quote }} + icon: "{{ $url }}/favicon.ico" + description: "The Alertmanager handles alerts sent by client applications such as the Prometheus server. It takes care of deduplicating, grouping, and routing them to the correct receiver integration such as email, PagerDuty, or OpsGenie. It also takes care of silencing and inhibition of alerts." +{{- end }} diff --git a/infra-monitoring/templates/alertmanager/matrix/release.yaml b/infra-monitoring/templates/alertmanager/matrix/release.yaml new file mode 100644 index 0000000..836a050 --- /dev/null +++ b/infra-monitoring/templates/alertmanager/matrix/release.yaml @@ -0,0 +1,64 @@ +{{- with .Values.alertmanager.receiver.matrix }} +{{- if .enabled }} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: "alertmanager-matrix" +spec: + chart: + spec: + sourceRef: + kind: "GitRepository" + name: "wrenix-helm-charts" + namespace: "flux-system" + chart: "./alertmanager-matrix" + reconcileStrategy: "Revision" + install: + {{- toYaml $.Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml $.Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml $.Values.commons.helm.release.upgrade | nindent 4 }} + interval: 5m + valuesFrom: + - kind: Secret + name: "global-alertmanager-matrix-token" + optional: false + valuesKey: token + targetPath: bot.matrix.token + values: + replicaCount: 1 + + bot: + matrix: + homeserver: {{ .homeserver | quote }} + userID: {{ .userID | quote }} + rooms: + - {{ .default | quote }} + {{- range $item := .rooms }} + - {{ $item.room | quote }} + {{- end }} + alertmanager: "https://{{ $.Values.alertmanager.ingress.host | default (printf "alertmanager.%s" $.Values.commons.ingress.domain) }}" + showLabels: true + + serviceAccount: + create: false + + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + + tolerations: + {{- toYaml $.Values.commons.tolerations | nindent 6 }} + affinity: + {{- toYaml $.Values.commons.affinity | nindent 6 }} +{{- end }} +{{- end }} diff --git a/infra-monitoring/templates/alertmanager/matrix/secret.yaml b/infra-monitoring/templates/alertmanager/matrix/secret.yaml new file mode 100644 index 0000000..e9bb441 --- /dev/null +++ b/infra-monitoring/templates/alertmanager/matrix/secret.yaml @@ -0,0 +1,11 @@ +{{- with .Values.alertmanager.receiver.matrix }} +{{- if .enabled }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: global-alertmanager-matrix-token +data: + token: {{ .token | b64enc }} +{{- end }} +{{- end }} diff --git a/infra-monitoring/templates/alertmanager/ntfy/release.yaml b/infra-monitoring/templates/alertmanager/ntfy/release.yaml new file mode 100644 index 0000000..7adece8 --- /dev/null +++ b/infra-monitoring/templates/alertmanager/ntfy/release.yaml @@ -0,0 +1,74 @@ +{{- with .Values.alertmanager.receiver.ntfy }} +{{- if .enabled }} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: "alertmanager-ntfy" +spec: + chart: + spec: + sourceRef: + kind: "GitRepository" + name: "wrenix-helm-charts" + namespace: "flux-system" + chart: "./alertmanager-ntfy" + reconcileStrategy: "Revision" + install: + {{- toYaml $.Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml $.Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml $.Values.commons.helm.release.upgrade | nindent 4 }} + interval: 5m + valuesFrom: + - kind: Secret + name: "global-alertmanager-ntfy-auth" + optional: true + valuesKey: username + targetPath: ntfyAlertmanager.user + - kind: Secret + name: "global-alertmanager-ntfy-auth" + optional: true + valuesKey: password + targetPath: ntfyAlertmanager.password + values: + ntfyAlertmanager: + ntfy: + # for topic, username, password + {{- toYaml .config.ntfy | nindent 8 }} + labels: + {{- toYaml .config.labels | nindent 8 }} + + {{- with .ingress }} + {{- if .enabled }} + {{- $host := .host | default (printf "alert2ntfy.%s" $.Values.commons.ingress.domain) }} + ingress: + enabled: true + annotations: + {{- with $.Values.commons.ingress.annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + hosts: + - host: "{{ $host }}" + paths: + - path: "/" + pathType: Prefix + {{- if $.Values.commons.ingress.tls.enabled }} + tls: + {{- with $.Values.commons.ingress.tls.override }} + {{- toYaml . | nindent 6 }} + {{- else }} + - secretName: "infra-monitoring-alert-ntfy-cert" + hosts: + - "{{ $host }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }} + +{{- end }} +{{- end }} diff --git a/infra-monitoring/templates/alertmanager/ntfy/secret.yaml b/infra-monitoring/templates/alertmanager/ntfy/secret.yaml new file mode 100644 index 0000000..3a6c041 --- /dev/null +++ b/infra-monitoring/templates/alertmanager/ntfy/secret.yaml @@ -0,0 +1,12 @@ +{{- with .Values.alertmanager.receiver.ntfy }} +{{- if and .enabled (.config.user) }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: global-alertmanager-ntfy-auth +data: + username: {{ .config.user| b64enc }} + password: {{ .config.password | default (derivePassword 1 "long" $.Values.commons.masterPassword "alertmanger" "ntfyPassword") | b64enc }} +{{- end }} +{{- end }} diff --git a/infra-monitoring/templates/configmap_init_crd.yaml b/infra-monitoring/templates/configmap_init_crd.yaml new file mode 100644 index 0000000..16a065c --- /dev/null +++ b/infra-monitoring/templates/configmap_init_crd.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-init + namespace: "{{ .Values.init.namespace }}" +data: + {{- if and + (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") + }} + init: "-1" + {{- else }} + init: "{{ add1 .Values.init.version }}" + {{- end }} + diff --git a/infra-monitoring/templates/exporter/blackbox/release.yaml b/infra-monitoring/templates/exporter/blackbox/release.yaml new file mode 100644 index 0000000..ff53134 --- /dev/null +++ b/infra-monitoring/templates/exporter/blackbox/release.yaml @@ -0,0 +1,183 @@ +{{- if .Values.prometheus.exporter.blackbox.enabled }} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: prometheus-blackbox-exporter +spec: + chart: + spec: + sourceRef: + kind: HelmRepository + name: "prometheus-community" + chart: "prometheus-blackbox-exporter" + interval: 10m + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + + # to run icmp + # Workaround: https://github.com/prometheus-community/helm-charts/issues/2360 + podSecurityContext: + sysctls: + - name: net.ipv4.ping_group_range + value: "0 65536" + # securityContext: + # capabilities: + # add: ["NET_RAW"] + + serviceMonitor: + selfMonitor: + enabled: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} + labels: + {{- toYaml .Values.commons.prometheus.monitor.labels | nindent 10 }} + + + {{- with .Values.prometheus.exporter.blackbox.ingress }} + {{- if .enabled }} + {{- $host := .host | default (printf "blackbox.exporter.%s" $.Values.commons.ingress.domain)}} + ingress: + enabled: true + annotations: + {{- with $.Values.commons.ingress.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + hosts: + - host: "{{ $host }}" + paths: + - path: "/" + pathType: Prefix + {{- if $.Values.commons.ingress.tls.enabled }} + tls: + {{- with $.Values.commons.ingress.tls.override }} + {{- toYaml . | nindent 8 }} + {{- else }} + - secretName: "infra-monitoring-exporter-blackbox-cert" + hosts: + - "{{ $host }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }} + + + config: + modules: + # HTTP IPv4 and IPv6 only + http_2xx: + prober: http + + http_2xx_tls_skip_verify: + prober: http + http: + tls_config: + insecure_skip_verify: true + + http_2xx_ipv4: + prober: http + http: + ip_protocol_fallback: false + preferred_ip_protocol: "ip4" + http_2xx_ipv6: + prober: http + http: + ip_protocol_fallback: false + preferred_ip_protocol: "ip6" + + http_post_2xx: + prober: http + http: + method: POST + + # TCP + tcp_connect: + prober: tcp + tcp_connect_ipv4: + prober: tcp + tcp: + ip_protocol_fallback: false + preferred_ip_protocol: "ip4" + tcp_connect_ipv6: + prober: tcp + tcp: + ip_protocol_fallback: false + preferred_ip_protocol: "ip6" + + # TLS + tls_connect: + prober: tcp + tcp: + tls: true + tls_config: + insecure_skip_verify: false + tls_connect_tls_skip_verify: + prober: tcp + tcp: + tls: true + tls_config: + insecure_skip_verify: true + tls_connect_ipv4: + prober: tcp + tcp: + tls: true + tls_config: + insecure_skip_verify: false + ip_protocol_fallback: false + preferred_ip_protocol: "ip4" + tls_connect_ipv6: + prober: tcp + tcp: + tls: true + tls_config: + insecure_skip_verify: false + ip_protocol_fallback: false + preferred_ip_protocol: "ip6" + + pop3s_banner: + prober: tcp + tcp: + query_response: + - expect: "^+OK" + tls: true + tls_config: + insecure_skip_verify: false + grpc: + prober: grpc + grpc: + tls: true + grpc_plain: + prober: grpc + grpc: + tls: false + service: "service1" + ssh_banner: + prober: tcp + tcp: + query_response: + - expect: "^SSH-2.0-" + - send: "SSH-2.0-blackbox-ssh-check" + + # Ping IPv4 and IPv6 only + icmp: + prober: icmp + icmp: + preferred_ip_protocol: "ip4" + icmp_ipv4: + prober: icmp + icmp: + ip_protocol_fallback: false + preferred_ip_protocol: "ip4" + icmp_ipv6: + prober: icmp + icmp: + ip_protocol_fallback: false + preferred_ip_protocol: "ip6" +{{- end }} diff --git a/infra-monitoring/templates/grafana/authentik-application.yaml b/infra-monitoring/templates/grafana/authentik-application.yaml new file mode 100644 index 0000000..49e766f --- /dev/null +++ b/infra-monitoring/templates/grafana/authentik-application.yaml @@ -0,0 +1,70 @@ +{{- if and + .Values.commons.auth.enabled (eq .Values.commons.auth.type "authentik") + .Values.grafana.ingress.enabled +}} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: authentik-application-grafana +spec: + chart: + spec: + sourceRef: + kind: GitRepository + name: "wrenix-helm-charts" + namespace: "flux-system" + chart: "./authentik-application" + reconcileStrategy: "Revision" + interval: 10m + releaseName: authentik-application-infra-grafana + targetNamespace: {{ .Values.commons.auth.namespace }} + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + {{- $url := default (printf "grafana.%s" .Values.commons.ingress.domain) .Values.grafana.ingress.host }} + blueprint: + authentik: + domain: "https://{{ .Values.commons.auth.authentik.domain }}" + provider: + type: "oidc" + name: "Grafana" + oidc: + clientType: "confidential" + redirectURL: "https://{{ $url }}/login/generic_oauth" + clientID: {{ .Values.grafana.auth.authentik.clientID | default (derivePassword 1 "long" .Values.commons.masterPassword "grafana" "clientID") | quote }} + clientSecret: {{ .Values.grafana.auth.authentik.clientSecret | default (derivePassword 1 "long" .Values.commons.masterPassword "grafana" "clientSecret") | quote }} + # signingKey: + scopes: + - openid + - email + - profile + + groups: + - slug: "app: infra" + bindID: "8c750219-36cd-47f4-8942-134f3dada96e" + + - slug: "app: grafana - admin" + bindID: "d2bebd53-24b8-48d8-a479-d253971dc453" + parent: "app: infra" + + - slug: "app: grafana - editor" + bind: "cff2982b-4b47-4e27-8d47-e6fec3e7cfac" + parent: "app: infra" + + application: + group: "Infrastructure" + policyEngineMode: "any" + openInNewTab: true + publisher: "WrenIX's Infra" + slug: "infra-grafana" + name: "Grafana" + launchURL: "https://{{ $url }}" + icon: "https://{{ $url }}/public/img/grafana_icon.svg" + description: "Grafana is a multi-platform open source analytics and interactive visualization web application. It provides charts, graphs, and alerts for the web when connected to supported data sources." +{{- end }} diff --git a/infra-monitoring/templates/karma/authentik-application.yaml b/infra-monitoring/templates/karma/authentik-application.yaml new file mode 100644 index 0000000..ea888ec --- /dev/null +++ b/infra-monitoring/templates/karma/authentik-application.yaml @@ -0,0 +1,60 @@ +{{- if and + .Values.commons.auth.enabled (eq .Values.commons.auth.type "authentik") + .Values.karma.enabled +}} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: authentik-application-karma +spec: + chart: + spec: + sourceRef: + kind: GitRepository + name: "wrenix-helm-charts" + namespace: "flux-system" + chart: "./authentik-application" + reconcileStrategy: "Revision" + interval: 10m + releaseName: authentik-application-infra-karma + targetNamespace: {{ .Values.commons.auth.namespace }} + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + {{- $domain := .Values.karma.ingress.host | default (printf "karma.%s" .Values.commons.ingress.domain) }} + {{- $url := printf "https://%s" $domain }} + blueprint: + authentik: + domain: "https://{{ .Values.commons.auth.authentik.domain }}" + provider: + enabled: {{ not .Values.karma.auth.anonymous.enabled }} + type: "proxy" + proxy: + externalHost: {{ $url | quote }} + skipPathRegex: | + /favicon.ico + ingress: + enabled: true + domain: {{ $domain | quote }} + + groups: + - slug: "app: infra" + bindID: "e4bda29f-0059-4590-9451-bc4ddd24b666" + + application: + group: "Infrastructure" + policyEngineMode: "any" + openInNewTab: true + publisher: "WrenIX's Infra" + slug: "infra-karma" + name: "Karma" + launchURL: {{ $url | quote }} + icon: "{{ $url }}/favicon.ico" + description: "Alertmanager UI is useful for browsing alerts and managing silences, but it’s lacking as a dashboard tool - karma aims to fill this gap." +{{- end }} diff --git a/infra-monitoring/templates/karma/release.yaml b/infra-monitoring/templates/karma/release.yaml new file mode 100644 index 0000000..72d3d93 --- /dev/null +++ b/infra-monitoring/templates/karma/release.yaml @@ -0,0 +1,138 @@ +{{- if .Values.karma.enabled }} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: karma +spec: + chart: + spec: + sourceRef: + kind: HelmRepository + name: "wiremind" + chart: "karma" + interval: 10m + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + configMap: + enabled: true + rawConfig: + karma: + name: "Alerts" + {{- with .Values.commons.auth }} + {{- if and .enabled (eq .type "authentik") (not $.Values.karma.auth.anonymous.enabled) }} + authentication: + header: + name: "X-authentik-username" + value_re: "^(.+)$" + group_name: "X-authentik-groups" + group_value_re: "^(.+)$" + group_value_separator: "|" + {{- end }} + {{- end }} + ui: + refresh: 10s + multiGridLabel: severity + grid: + sorting: + order: label + reverse: false + customValues: + labels: + severity: + critical: 1 + warning: 2 + info: 3 + auto: + order: + - severity + labels: + valueOnly: + - alertname + - severity + strip: + - active + - load + - sub + - state + - fstype + - prometheus + - "@receiver" + - job + - service + - container + - endpoint + color: + unique: + - namespace + - "@cluster" + custom: + severity: + - value: info + color: "#87c4e0" + - value: warning + color: "#ffae42" + - value: critical + color: "#ff220c" + filters: + default: + - "@state=active" + + history: + enabled: true + timeout: 5s + workers: 5 + + alertmanager: + servers: + - name: "default" + uri: "http://alertmanager-operated:9093" + proxy: true + healthcheck: + filters: + watchdog: + - "alertname=Watchdog" + - "severity=none" + {{- with .Values.karma.additionalAlertmanager }} + {{- toYaml . | nindent 12 }} + {{- end }} + + {{- with .Values.karma.ingress }} + {{- if .enabled }} + {{- $host := .host | default (printf "karma.%s" $.Values.commons.ingress.domain) }} + ingress: + enabled: true + annotations: + {{- with $.Values.commons.ingress.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $.Values.commons.auth }} + {{- if and .enabled (eq .type "authentik") (not $.Values.karma.auth.anonymous.enabled) }} + "traefik.ingress.kubernetes.io/router.middlewares": "{{ .namespace }}-authentik-outpost@kubernetescrd" + {{- end }} + {{- end }} + {{- with .annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + hosts: + - "{{ $host }}" + path: "/" + {{- if $.Values.commons.ingress.tls.enabled }} + tls: + {{- with $.Values.commons.ingress.tls.override }} + {{- toYaml . | nindent 8 }} + {{- else }} + - secretName: "infra-monitoring-karma-cert" + hosts: + - "{{ $host }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }}{{/* end-with karma.ingress */}} +{{- end }} diff --git a/infra-monitoring/templates/karma/repo.yaml b/infra-monitoring/templates/karma/repo.yaml new file mode 100644 index 0000000..d408c6e --- /dev/null +++ b/infra-monitoring/templates/karma/repo.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: wiremind +spec: + url: https://wiremind.github.io/wiremind-helm-charts + interval: 10m0s diff --git a/infra-monitoring/templates/kube-prometheus-stack/release.yaml b/infra-monitoring/templates/kube-prometheus-stack/release.yaml new file mode 100644 index 0000000..b5fc7db --- /dev/null +++ b/infra-monitoring/templates/kube-prometheus-stack/release.yaml @@ -0,0 +1,222 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: kube-prometheus-stack +spec: + chart: + spec: + sourceRef: + kind: HelmRepository + name: "prometheus-community" + chart: "kube-prometheus-stack" + interval: 10m + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + commonLabels: + prometheus: "default" + + defaultRules: + rules: + kubeProxy: false + + prometheus: + {{- $hostPrometheus := .Values.prometheus.ingress.host | default (printf "prometheus.%s" .Values.commons.ingress.domain) }} + prometheusSpec: + {{- if .Values.prometheus.ingress.enabled }} + externalUrl: "https://{{ $hostPrometheus }}" + {{- end }} + podMonitorSelector: + matchLabels: + prometheus: "default" + probeMonitorSelector: + matchLabels: + prometheus: "default" + probeSelector: + matchLabels: + prometheus: "default" + ruleSelector: + matchLabels: + prometheus: "default" + serviceMonitorSelector: + matchLabels: + prometheus: "default" + scrapeConfigSelector: + matchLabels: + prometheus: "default" + storageSpec: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi + {{- with .Values.prometheus.ingress }} + {{- if .enabled }} + ingress: + enabled: true + annotations: + {{- with $.Values.commons.ingress.annotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .annotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + hosts: + - "{{ $hostPrometheus }}" + path: "/" + {{- if $.Values.commons.ingress.tls.enabled }} + tls: + {{- with $.Values.commons.ingress.tls.override }} + {{- toYaml . | nindent 10 }} + {{- else }} + - secretName: "infra-monitoring-prometheus-cert" + hosts: + - "{{ $hostPrometheus }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }}{{/* end-with prometheus.ingress */}} + + alertmanager: + {{- $hostAlertmanager := .Values.alertmanager.ingress.host | default (printf "alertmanager.%s" $.Values.commons.ingress.domain) }} + alertmanagerSpec: + {{- if .Values.alertmanager.ingress.enabled }} + externalUrl: "https://{{ $hostAlertmanager }}" + {{- end }} + replicas: 1 + alertmanagerConfiguration: + name: "global-alertmanager-conf" + alertmanagerConfigSelector: + matchLabels: + alertmanager: "default" + + {{- with .Values.alertmanager.ingress }} + {{- if .enabled }} + ingress: + enabled: true + annotations: + {{- with $.Values.commons.ingress.annotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .annotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + hosts: + - "{{ $hostAlertmanager }}" + paths: [ "/" ] + {{- if $.Values.commons.ingress.tls.enabled }} + tls: + {{- with $.Values.commons.ingress.tls.override }} + {{- toYaml . | nindent 10 }} + {{- else }} + - secretName: "infra-monitoring-alertmanager-cert" + hosts: + - "{{ $hostAlertmanager }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }}{{/* end-with alertmanager.ingress */}} + + grafana: + adminPassword: {{ .Values.grafana.adminPassword | default (derivePassword 1 "long" .Values.commons.masterPassword "grafana" "adminPassword") | quote }} + grafana.ini: + server: + # bug in grafana-helmchart needed for oauth redirect url (without port :3000) + root_url: "https://%(domain)s/" + auth: + {{- if and .Values.commons.auth.enabled (eq .Values.commons.auth.type "authentik") }} + signout_redirect_url: https://{{ .Values.commons.auth.authentik.domain }}/application/o/grafana/end-session/ + oauth_auto_login: true + {{- else }} + disable_login_form: true + disable_signout_menu: true + {{- end }} + + {{- if .Values.grafana.auth.anonymous.enabled }} + auth.anonymous: + enabled: true + {{- end }} + + {{- if and .Values.commons.auth.enabled (eq .Values.commons.auth.type "authentik") }} + auth.generic_oauth: + name: authentik + enabled: true + client_id: {{ .Values.grafana.auth.authentik.clientID | default (derivePassword 1 "long" .Values.commons.masterPassword "grafana" "clientID") | quote }} + client_secret: {{ .Values.grafana.auth.authentik.clientSecret | default (derivePassword 1 "long" .Values.commons.masterPassword "grafana" "clientSecret") | quote }} + scopes: openid email profile + auth_url: https://{{ .Values.commons.auth.authentik.domain }}/application/o/authorize/ + token_url: https://{{ .Values.commons.auth.authentik.domain }}/application/o/token/ + api_url: https://{{ .Values.commons.auth.authentik.domain }}/application/o/userinfo/ + role_attribute_path: "contains(groups[*], 'app: grafana - admin') && 'Admin' || contains(groups[*], 'app: grafana - editor') && 'Editor' || 'Viewer'" + {{- end }} + + testFramework: + enabled: false + sidecar: + dashboards: + searchNamespace: ALL + + ## set option to grafana (and sidecar) to create folder in grafana based on annotations + folderAnnotation: {{ .Values.grafana.dashboards.folderAnnotation }} + provider: + foldersFromFilesStructure: true + + ## put all default-dashboards to folder Kubernetes + {{- with .Values.grafana.dashboards.annotations }} + annotations: + {{- toYaml . | nindent 12 }} + {{- end }} + datasources: + searchNamespace: ALL + {{- with .Values.grafana.ingress }} + {{- if .enabled }} + {{- $hostGrafana := .host | default (printf "grafana.%s" $.Values.commons.ingress.domain) }} + ingress: + enabled: true + annotations: + {{- with $.Values.commons.ingress.annotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .annotations }} + {{- toYaml . | nindent 10 }} + {{- end }} + hosts: + - "{{ $hostGrafana }}" + path: "/" + {{- if $.Values.commons.ingress.tls.enabled }} + tls: + {{- with $.Values.commons.ingress.tls.override }} + {{- toYaml . | nindent 10 }} + {{- else }} + - secretName: "infra-monitoring-grafana-cert" + hosts: + - "{{ $hostGrafana }}" + {{- end }} + {{- end }} + {{- end }} + {{- end }}{{/* end-with grafana.ingress */}} + + kube-state-metrics: + prometheus: + monitor: + additionalLabels: + prometheus: "default" + + prometheus-node-exporter: + prometheus: + monitor: + additionalLabels: + prometheus: "default" + + kubeScheduler: + enabled: false + + kubeControllerManager: + enabled: false diff --git a/infra-monitoring/templates/prometheus/authentik-application.yaml b/infra-monitoring/templates/prometheus/authentik-application.yaml new file mode 100644 index 0000000..aea70f6 --- /dev/null +++ b/infra-monitoring/templates/prometheus/authentik-application.yaml @@ -0,0 +1,59 @@ +{{- if and + .Values.commons.auth.enabled (eq .Values.commons.auth.type "authentik") + .Values.prometheus.ingress.enabled +}} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: authentik-application-prometheus +spec: + chart: + spec: + sourceRef: + kind: GitRepository + name: "wrenix-helm-charts" + namespace: "flux-system" + chart: "./authentik-application" + reconcileStrategy: "Revision" + interval: 10m + releaseName: authentik-application-infra-prometheus + targetNamespace: {{ .Values.commons.auth.namespace }} + install: + {{- toYaml .Values.commons.helm.release.install | nindent 4 }} + test: + {{- toYaml .Values.commons.helm.release.test | nindent 4 }} + upgrade: + {{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }} + interval: 10m + values: + {{- $domain := .Values.prometheus.ingress.host | default (printf "prometheus.%s" .Values.commons.ingress.domain) }} + {{- $url := printf "https://%s" $domain }} + blueprint: + authentik: + domain: "https://{{ .Values.commons.auth.authentik.domain }}" + provider: + enabled: {{ not .Values.prometheus.auth.anonymous.enabled }} + type: "proxy" + proxy: + externalHost: {{ $url | quote }} + skipPathRegex: | + /favicon.ico + ingress: + enabled: true + domain: {{ $domain | quote }} + groups: + - slug: "app: infra" + bindID: "2b105d85-37f8-4552-b633-8434efeeebe7" + + application: + group: "Infrastructure" + policyEngineMode: "any" + openInNewTab: true + publisher: "WrenIX's Infra" + slug: "infra-prometheus" + name: "Prometheus" + launchURL: {{ $url | quote }} + icon: "{{ $url }}/favicon.ico" + description: "Prometheus is an open-source systems monitoring and alerting toolkit." +{{- end }} diff --git a/infra-monitoring/templates/repo.yaml b/infra-monitoring/templates/repo.yaml new file mode 100644 index 0000000..1c90ca8 --- /dev/null +++ b/infra-monitoring/templates/repo.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: prometheus-community +spec: + url: https://prometheus-community.github.io/helm-charts/ + interval: 10m0s diff --git a/infra-monitoring/values.yaml b/infra-monitoring/values.yaml new file mode 100644 index 0000000..0cc1501 --- /dev/null +++ b/infra-monitoring/values.yaml @@ -0,0 +1,181 @@ +init: + version: 0 + namespace: "bases" + +commons: + masterPassword: "CHANGEME" + + auth: + enabled: false + type: "authentik" + namespace: "" + authentik: + domain: "" + + helm: + release: + install: {} + test: {} + upgrade: {} + + ingress: + domain: "wrenix.eu" + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + tls: + # -- tls on every ingress + enabled: false + # -- use own definition of tls (e.g. for own or wildcard certificate) + override: + + prometheus: + monitor: + labels: {} + +alertmanager: + ingress: + enabled: true + # use infra.commons.ingress.host with prefix + host: "" + # -- annotations on ingress object (beside of .Values.commons.ingress.annotations ) + annotations: {} + + auth: + anonymous: + enabled: false + + receiver: + # -- would overwrite defaults (like ntfy or matrix) + routes: + # # send every alert to ntfy + # # (and continue to routing for that alert) + # - receiver: "ntfy-default" + # continue: true + # # send selected alerts to special matrix room + # # (and remove them for evaluation - no continue) + # - receiver: "matrix-room-name" + # matchers: + # - name: "team" + # matchType: "=" + # value: "room-name" + # # all reminig alerts to matrix default room + # - receiver: "matrix-default" + # -- repeat Interval + repeatInterval: "24h" + matrix: + enabled: false + sendResolved: false + homeserver: "https://matrix.org" + userID: "@alert:matrix.org" + default: "!example-room:matrix.org" + # + rooms: {} + # room-name: + # sendResolved: false + # room: ""!example-room-name:matrix.org"" + # + ntfy: + enabled: true + sendResolved: false + ingress: + enabled: false + # use infra.commons.ingress.host with prefix + host: + # -- annotations on ingress object (beside of .Values.commons.ingress.annotations ) + annotations: {} + config: + # -- user used between alertmanager and ntfy receiver + user: "alertmanager-to-ntfy" + # -- password used between alertmanager and ntfy reciever generated from commons.masterPassword + password: + ntfy: + topic: "https://ntfy.wrenix.eu/alertmanager-example" + # user: + # password: + labels: + order: + - severity + entries: + - label: severity + value: critical + priority: 4 + tags: + - "rotating_light" + + - label: severity + value: warning + priority: 3 + tags: + - "warning" + + - label: severity + value: info + priority: 1 + tags: + - "information_source" + +prometheus: + ingress: + enabled: true + # use infra.commons.ingress.host with prefix + host: "" + # -- annotations on ingress object (beside of .Values.commons.ingress.annotations ) + annotations: {} + # not yet supported again (replace or merge again - complex code) + # spec: {} + + auth: + anonymous: + enabled: false + + exporter: + blackbox: + enabled: true + ingress: + # -- enable ingress for blackbox-exporter + enabled: false + # -- default use .Values.commons.ingress.host with prefix: `blackbox.exporter` + host: "" + # -- annotations on ingress object (beside of .Values.commons.ingress.annotations ) + annotations: {} + +grafana: + # -- generated from commons.masterPassword + adminPassword: + + ingress: + enabled: true + # use infra.commons.ingress.host with prefix + host: "" + # -- annotations on ingress object (beside of .Values.commons.ingress.annotations ) + annotations: {} + + dashboards: + folderAnnotation: "grafana.mon.local/dashboard-folder" + annotations: + "grafana.mon.local/dashboard-folder": "Kubernetes" + + auth: + authentik: + # -- generated from commons.masterPassword + clientID: + # -- generated from commons.masterPassword + clientSecret: + anonymous: + enabled: true + +karma: + enabled: true + + ingress: + enabled: true + # use infra.commons.ingress.host with prefix + host: "" + # -- annotations on ingress object (beside of .Values.commons.ingress.annotations ) + annotations: {} + + auth: + anonymous: + enabled: false + + additionalAlertmanager: