fix(infra-logging): migrate from logging-operator to vector-agent
This commit is contained in:
parent
a87841325d
commit
b605fe6a3c
13 changed files with 106 additions and 408 deletions
53
infra-logging/config/vector.yaml
Normal file
53
infra-logging/config/vector.yaml
Normal file
|
@ -0,0 +1,53 @@
|
|||
data_dir: /vector-data-dir
|
||||
api:
|
||||
enabled: true
|
||||
address: 127.0.0.1:8686
|
||||
playground: false
|
||||
sources:
|
||||
kubernetes_logs:
|
||||
type: kubernetes_logs
|
||||
host_metrics:
|
||||
filesystem:
|
||||
devices:
|
||||
excludes: [binfmt_misc]
|
||||
filesystems:
|
||||
excludes: [binfmt_misc]
|
||||
mountpoints:
|
||||
excludes: ["*/proc/sys/fs/binfmt_misc"]
|
||||
type: host_metrics
|
||||
internal_metrics:
|
||||
type: internal_metrics
|
||||
transforms:
|
||||
logs:
|
||||
type: remap
|
||||
inputs:
|
||||
- kubernetes_logs
|
||||
source: |-
|
||||
if !exists(.pod_namespace) {
|
||||
.pod_namespace = "unknown"
|
||||
}
|
||||
if !exists(.metadata) {
|
||||
.metadata = {
|
||||
"not found": "unknown"
|
||||
}
|
||||
}
|
||||
sinks:
|
||||
prom_exporter:
|
||||
type: prometheus_exporter
|
||||
inputs: [host_metrics, internal_metrics]
|
||||
address: 0.0.0.0:9090
|
||||
{{- if .Values.loki.enabled }}
|
||||
loki:
|
||||
type: loki
|
||||
inputs:
|
||||
- logs
|
||||
endpoint: http://loki:3100
|
||||
encoding:
|
||||
codec: logfmt
|
||||
labels:
|
||||
{{`
|
||||
"pod_labels_*": "{{ kubernetes.pod_labels }}"
|
||||
"namespace": "{{ kubernetes.pod_namespace }}"
|
||||
"*": "{{ metadata }}"
|
||||
`}}
|
||||
{{- end }}
|
|
@ -6,9 +6,8 @@ metadata:
|
|||
namespace: "{{ .Values.init.namespace }}"
|
||||
data:
|
||||
{{- if and
|
||||
(.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor")
|
||||
(.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PodMonitor")
|
||||
(.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule")
|
||||
(.Capabilities.APIVersions.Has "logging.banzaicloud.io/v1beta1/Flow")
|
||||
}}
|
||||
init: "-1"
|
||||
{{- else }}
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
{{- if (.Capabilities.APIVersions.Has "logging.banzaicloud.io/v1beta1/Flow") }}
|
||||
---
|
||||
apiVersion: logging.banzaicloud.io/v1beta1
|
||||
kind: Flow
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
spec:
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
k8s-app: "coredns"
|
||||
filters:
|
||||
- tag_normaliser: {}
|
||||
- parser:
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
parse:
|
||||
type: "multi_format"
|
||||
patterns:
|
||||
- format: "regexp"
|
||||
expression: '^\[(?<log.level>.*)\] \[?(?<source.address>.*)\]?:(?<source.port>.*) - (?<dns.id>.*) "(?<dns.question.type>.*) (?<dns.question.class>.*) (?<dns.question.name>.*)\.? (?<network.transport>.*) (?<coredns.query.size>.*) (?<coredns.dnssec_ok>.*) (?<bufsize>.*)" (?<dns.response_code>.*) (?<dns.header_flags>.*) (?<coredns.response.size>.*) (?<coredns.duration>.*)s'
|
||||
types: "source.port:integer,dns.id:integer,coredns.query.size:integer,coredns.dnssec_ok:bool,bufsize:integer,dns.header_flags:array,coredns.response.size:integer,coredns.duration:float"
|
||||
- format: "none"
|
||||
- record_transformer:
|
||||
enable_ruby: true
|
||||
records:
|
||||
- source.ip: '${ record["source.address"] }'
|
||||
dns.header_flags: '${ !(record["dns.header_flags"].nil?) ? record["dns.header_flags"].map(&:upcase) : nil }'
|
||||
event.duration: '${ !(record["coredns.duration"].nil?) ? record["coredns.duration"] * 1000000000 : nil }'
|
||||
event.kind: "event"
|
||||
event.category: "network"
|
||||
event.type: "protocol"
|
||||
event.outcome: '${ record["dns.response_code"] == "NOERROR" ? "success" : "failure" }'
|
||||
event.protocol: "dns"
|
||||
event.module: "coredns"
|
||||
related.ip: '${ record["source.address"] }'
|
||||
# for dashboard
|
||||
fileset.name: "kubernetes"
|
||||
coredns.query.name: '${ record["dns.question.name"] }'
|
||||
remove_keys: "coredns.duration,coredns.dnssec_ok"
|
||||
globalOutputRefs:
|
||||
- "default"
|
||||
{{- end }}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
{{- if (.Capabilities.APIVersions.Has "logging.banzaicloud.io/v1beta1/Flow") }}
|
||||
---
|
||||
apiVersion: logging.banzaicloud.io/v1beta1
|
||||
kind: Flow
|
||||
metadata:
|
||||
name: klog
|
||||
namespace: kube-system
|
||||
spec:
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
k8s-app: "konnectivity-agent"
|
||||
- select:
|
||||
labels:
|
||||
k8s-app: "kube-proxy"
|
||||
- select:
|
||||
labels:
|
||||
app: "snapshot-validation-webhook"
|
||||
filters:
|
||||
- tag_normaliser: {}
|
||||
- parser:
|
||||
hash_value_field: "klog"
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
parse:
|
||||
type: "multi_format"
|
||||
patterns:
|
||||
- format: "regexp"
|
||||
expression: '(?<log_level>[A-Z])(?<month>\d{2})(?<day>\d{2})\s+(?<time>\d{2}:\d{2}:\d{2}(|\.\d+))\s+(?<threadid>\d+)\s+(?<file>[^ ]*):(?<line>\d+)\]\s("(?<msg>([^"\\]*(?:\\.[^"\\]*)*))"(|\s+(?<kv>.*))|(?<greedy_msg>.*))$'
|
||||
types: "month:integer,day:integer,threadid:integer"
|
||||
- format: "none"
|
||||
- record_transformer:
|
||||
enable_ruby: true
|
||||
records:
|
||||
- timestamp: '${time.strftime("%Y")}-${ record["klog"]["month"] }-${ record["klog"]["day"] }T${ record["klog"]["time"] }Z'
|
||||
message: '${ !(record["klog"]["greedy_msg"].nil?) ? record["klog"]["greedy_msg"] : record["klog"]["msg"] }'
|
||||
log.level: '${ record["klog"]["log_level"].gsub("I", "info").gsub("W", "warn").gsub("E", "error").gsub("F", "fatal") }'
|
||||
klog_kv: '${ !(record["klog"]["kv"].nil?) ? record["klog"]["kv"] : "" }'
|
||||
remove_keys: "$['klog']['month'],$['klog']['day'],$['klog']['time'],$['klog']['log_level'],$['klog']['msg'],$['klog']['greedy_msg'],$['klog']['kv']"
|
||||
- parser:
|
||||
key_name: "klog_kv"
|
||||
hash_value_field: "klog.fields"
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
parse:
|
||||
type: "multi_format"
|
||||
patterns:
|
||||
- format: "logfmt"
|
||||
- format: "none"
|
||||
globalOutputRefs:
|
||||
- "default"
|
||||
{{- end }}
|
|
@ -1,29 +0,0 @@
|
|||
{{- if (.Capabilities.APIVersions.Has "logging.banzaicloud.io/v1beta1/Flow") }}
|
||||
---
|
||||
apiVersion: logging.banzaicloud.io/v1beta1
|
||||
kind: Flow
|
||||
metadata:
|
||||
name: event-tailer
|
||||
spec:
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
"app.kubernetes.io/name": "event-tailer"
|
||||
filters:
|
||||
- tag_normaliser: {}
|
||||
- parser:
|
||||
hash_value_field: "kubernetes"
|
||||
remove_key_name_field: true
|
||||
reserve_data: true
|
||||
parse:
|
||||
type: "json"
|
||||
- record_transformer:
|
||||
enable_ruby: true
|
||||
records:
|
||||
- event.module: "kubernetes"
|
||||
message: '${ record["kubernetes"]["event"]["message"] }'
|
||||
remove_keys: "$['kubernetes']['event']['message']"
|
||||
globalOutputRefs:
|
||||
- "default"
|
||||
{{- end }}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
{{- if (.Capabilities.APIVersions.Has "logging.banzaicloud.io/v1beta1/Flow") }}
|
||||
---
|
||||
apiVersion: logging.banzaicloud.io/v1beta1
|
||||
kind: Flow
|
||||
metadata:
|
||||
name: fluentbit
|
||||
spec:
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
"app.kubernetes.io/name": "fluentbit"
|
||||
filters:
|
||||
- tag_normaliser: {}
|
||||
- parser:
|
||||
hash_value_field: "fluentbit"
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
parse:
|
||||
type: "regexp"
|
||||
expression: '^\[(?<timestamp>.*)\] \[(?<log.level>.*)\] \[(?<component>.*)\] (?<message>.*)'
|
||||
types: "timestamp:string,log.level:string,component:string,message:string"
|
||||
time_key: "timestamp"
|
||||
time_type: "string"
|
||||
time_format: "%Y/%m/%d %H:%M:%S"
|
||||
- record_transformer:
|
||||
enable_ruby: true
|
||||
records:
|
||||
- event.kind: "event"
|
||||
event.module: "fluentbit"
|
||||
message: '${record["fluentbit"]["message"]}'
|
||||
log.level: '${record["fluentbit"]["log.level"]}'
|
||||
remove_keys: "$['fluentbit']['log']['level'],$['fluentbit']['message']"
|
||||
globalOutputRefs:
|
||||
- default
|
||||
{{- end }}
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
{{- if (.Capabilities.APIVersions.Has "logging.banzaicloud.io/v1beta1/Flow") }}
|
||||
---
|
||||
apiVersion: logging.banzaicloud.io/v1beta1
|
||||
kind: Flow
|
||||
metadata:
|
||||
name: logging-operator
|
||||
spec:
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
"app.kubernetes.io/name": "logging-operator"
|
||||
filters:
|
||||
- tag_normaliser: {}
|
||||
- parser:
|
||||
hash_value_field: "logging-operator"
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
parse:
|
||||
type: "json"
|
||||
time_key: "ts"
|
||||
time_type: "string"
|
||||
time_format: "%iso8601"
|
||||
- record_transformer:
|
||||
enable_ruby: true
|
||||
records:
|
||||
- event.kind: "event"
|
||||
event.module: "logging-operator"
|
||||
message: '${record["logging-operator"]["msg"]}'
|
||||
log.level: '${record["logging-operator"]["level"]}'
|
||||
remove_keys: "$['logging-operator']['level'],$['logging-operator']['msg']"
|
||||
globalOutputRefs:
|
||||
- default
|
||||
{{- end }}
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
---
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: logging-operator
|
||||
spec:
|
||||
chart:
|
||||
spec:
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: kube-logging
|
||||
chart: logging-operator
|
||||
install:
|
||||
{{- toYaml .Values.commons.helm.release.install | nindent 4 }}
|
||||
test:
|
||||
{{- toYaml .Values.commons.helm.release.test | nindent 4 }}
|
||||
upgrade:
|
||||
{{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }}
|
||||
driftDetection:
|
||||
{{- toYaml .Values.commons.helm.release.driftDetection | nindent 4 }}
|
||||
interval: 10m
|
||||
values:
|
||||
monitoring:
|
||||
serviceMonitor:
|
||||
enabled: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
|
||||
additionalLabels:
|
||||
{{- toYaml .Values.commons.prometheus.monitor.labels | nindent 10 }}
|
||||
|
||||
# resources for logging-operator
|
||||
resources:
|
||||
limits:
|
||||
memory: 3Gi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
|
||||
logging:
|
||||
enabled: true
|
||||
# fluentbit is used to collect data on nodes (so it is usefull to use hostPath)
|
||||
fluentbit:
|
||||
bufferStorageVolume:
|
||||
hostPath:
|
||||
path: "/var/lib/kube-logging/fluentbit/buffer"
|
||||
positiondb:
|
||||
hostPath:
|
||||
path: "/var/lib/kube-logging/fluentbit/positiondb"
|
||||
metrics:
|
||||
prometheusRules: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
|
||||
serviceMonitor: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
|
||||
serviceMonitorConfig:
|
||||
additionalLabels:
|
||||
{{- toYaml .Values.commons.prometheus.monitor.labels | nindent 14 }}
|
||||
|
||||
|
||||
# fluentd is used to recieve data from fluentbit, filter (e.g. parse, grep) and forward output (e.g. loki)
|
||||
fluentd:
|
||||
scaling:
|
||||
replicas: {{ .Values.fluentd.replicas }}
|
||||
# resources for fluentd
|
||||
resources:
|
||||
limits:
|
||||
memory: "2400M"
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "200M"
|
||||
metrics:
|
||||
prometheusRules: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
|
||||
serviceMonitor: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
|
||||
serviceMonitorConfig:
|
||||
additionalLabels:
|
||||
{{- toYaml .Values.commons.prometheus.monitor.labels | nindent 14 }}
|
||||
bufferVolumeMetrics:
|
||||
prometheusRules: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PrometheusRule") }}
|
||||
serviceMonitor: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
|
||||
serviceMonitorConfig:
|
||||
additionalLabels:
|
||||
{{- toYaml .Values.commons.prometheus.monitor.labels | nindent 14 }}
|
||||
|
||||
tls:
|
||||
# make problems on reinstall (maybe try it sometime again)
|
||||
enabled: false
|
||||
|
||||
# allow clusteroutput from flow in other namespace
|
||||
allowClusterResourcesFromAllNamespaces: true
|
||||
|
||||
enableRecreateWorkloadOnImmutableFieldChange: true
|
||||
|
||||
# log kubernetes events
|
||||
eventTailer:
|
||||
name: "default"
|
||||
|
||||
# forward errors to output
|
||||
errorOutputRef: "default"
|
||||
|
||||
# if no (cluster)flow exits for pods:
|
||||
# filter: drop log messages if they contains "debug"
|
||||
# send logs: clusterOutput "default"
|
||||
defaultFlow:
|
||||
filters:
|
||||
- grep:
|
||||
exclude:
|
||||
- key: "message"
|
||||
pattern: /.*[Dd]ebug.*/
|
||||
- prometheus:
|
||||
metrics:
|
||||
- name: "logs_defaultflow_count"
|
||||
desc: "The total number of message in namespace"
|
||||
type: "counter"
|
||||
labels:
|
||||
exported_namespace: "$.kubernetes.namespace_name"
|
||||
exported_pod: "$.kubernetes.pod_name"
|
||||
exported_container: "$.kubernetes.container_name"
|
||||
image: "$.kubernetes.container_image"
|
||||
app_kubernetes_io_name: "$['kubernetes']['labels']['app.kubernetes.io/name']"
|
||||
app_kubernetes_io_instance: "$['kubernetes']['labels']['app.kubernetes.io/instance']"
|
||||
globalOutputRefs:
|
||||
- "default"
|
||||
|
||||
# usefull on elastic e.g. with dedot
|
||||
globalFilters:
|
||||
- prometheus:
|
||||
metrics:
|
||||
- name: "logs_all_count"
|
||||
desc: "The total number of messages in namespace"
|
||||
type: "counter"
|
||||
labels:
|
||||
exported_namespace: "$.kubernetes.namespace_name"
|
||||
exported_pod: "$.kubernetes.pod_name"
|
||||
exported_container: "$.kubernetes.container_name"
|
||||
image: "$.kubernetes.container_image"
|
||||
app_kubernetes_io_name: "$['kubernetes']['labels']['app.kubernetes.io/name']"
|
||||
app_kubernetes_io_instance: "$['kubernetes']['labels']['app.kubernetes.io/instance']"
|
||||
|
||||
# deploy a clusteroutput (which all flows can use)
|
||||
clusterOutputs:
|
||||
- name: "default"
|
||||
spec:
|
||||
{{- if .Values.loki.enabled }}
|
||||
# for loki:
|
||||
# https://kube-logging.dev/docs/configuration/plugins/outputs/loki/
|
||||
loki:
|
||||
url: http://loki:3100
|
||||
buffer:
|
||||
timekey: 1m
|
||||
timekey_wait: 30s
|
||||
timekey_use_utc: true
|
||||
# do not use configure_kubernetes_labels strip other kubernetes labels
|
||||
extract_kubernetes_labels: true
|
||||
labels:
|
||||
# from configure_kubernetes_labels reimplement
|
||||
host: $.kubernetes.host
|
||||
namespace: $.kubernetes.namespace_name
|
||||
pod: $.kubernetes.pod_name
|
||||
pod_id: $.kubernetes.pod_id
|
||||
container: $.kubernetes.container_name
|
||||
container_id: $.kubernetes.docker_id
|
||||
{{- else }}
|
||||
nullout: {}
|
||||
{{- end }}
|
||||
|
||||
# add some usefull default clusterFlows
|
||||
clusterFlows:
|
||||
# parse all data with logfmt of pod which contain label: kube_logging_parser=logfmt (and send to ClusterOutput default)
|
||||
- name: logfmt
|
||||
spec:
|
||||
filters:
|
||||
- parser:
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
hash_value_field: "logfmt"
|
||||
parse:
|
||||
type: "multi_format"
|
||||
patterns:
|
||||
- format: "logfmt"
|
||||
# fallback, just keep data unparsed
|
||||
- format: "none"
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
"kube_logging_parser": "logfmt"
|
||||
globalOutputRefs:
|
||||
- "default"
|
||||
# parse all data with json of pod which contain label: kube_logging_parser=json (and send to ClusterOutput default)
|
||||
- name: json
|
||||
spec:
|
||||
filters:
|
||||
- parser:
|
||||
reserve_data: true
|
||||
remove_key_name_field: true
|
||||
hash_value_field: "json"
|
||||
parse:
|
||||
type: "multi_format"
|
||||
patterns:
|
||||
- format: "json"
|
||||
# fallback, just keep data unparsed
|
||||
- format: "none"
|
||||
match:
|
||||
- select:
|
||||
labels:
|
||||
"kube_logging_parser": "json"
|
||||
globalOutputRefs:
|
||||
- "default"
|
|
@ -66,8 +66,8 @@ spec:
|
|||
enabled: false
|
||||
grafanaAgent:
|
||||
installOperator: false
|
||||
lokiCanary:
|
||||
enabled: false
|
||||
lokiCanary:
|
||||
enabled: false
|
||||
test:
|
||||
enabled: false
|
||||
gateway:
|
||||
|
|
6
infra-logging/templates/vector/configmap.yaml
Normal file
6
infra-logging/templates/vector/configmap.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: vector
|
||||
data:
|
||||
vector.yaml: {{ tpl (.Files.Get "config/vector.yaml" ) . | quote }}
|
42
infra-logging/templates/vector/release.yaml
Normal file
42
infra-logging/templates/vector/release.yaml
Normal file
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: vector-agent
|
||||
spec:
|
||||
chart:
|
||||
spec:
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: vector
|
||||
chart: vector
|
||||
install:
|
||||
{{- toYaml .Values.commons.helm.release.install | nindent 4 }}
|
||||
test:
|
||||
{{- toYaml .Values.commons.helm.release.test | nindent 4 }}
|
||||
upgrade:
|
||||
{{- toYaml .Values.commons.helm.release.upgrade | nindent 4 }}
|
||||
driftDetection:
|
||||
{{- toYaml .Values.commons.helm.release.driftDetection | nindent 4 }}
|
||||
interval: 10m
|
||||
values:
|
||||
role: Agent
|
||||
updateStrategy:
|
||||
maxSurge: 1
|
||||
dataDir: "/vector-data-dir"
|
||||
logLevel: "warn"
|
||||
existingConfigMaps:
|
||||
- vector
|
||||
containerPorts:
|
||||
- name: prom-exporter
|
||||
containerPort: 9090
|
||||
protocol: TCP
|
||||
service:
|
||||
ports:
|
||||
- name: prom-exporter
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
podMonitor:
|
||||
enabled: {{ (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PodMonitor") }}
|
||||
additionalLabels:
|
||||
{{- toYaml .Values.commons.prometheus.monitor.labels | nindent 10 }}
|
|
@ -2,8 +2,7 @@
|
|||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: kube-logging
|
||||
name: vector
|
||||
spec:
|
||||
url: oci://ghcr.io/kube-logging/helm-charts
|
||||
type: oci
|
||||
url: https://helm.vector.dev
|
||||
interval: 10m
|
|
@ -45,8 +45,5 @@ grafana:
|
|||
# -- annotations of grafana dashboard configmap
|
||||
annotations: {}
|
||||
|
||||
fluentd:
|
||||
replicas: 1
|
||||
|
||||
loki:
|
||||
enabled: true
|
||||
|
|
Loading…
Add table
Reference in a new issue