frezes
感谢大佬回复,执行这个命令后,返回下面的内容,看起来是突然收到了“Received SIGTERM, exiting gracefully…” 关闭信号,然后就重启了。是因为达到了 resource limitation?
ts=2022-09-19T07:10:11.598Z caller=main.go:516 level=info msg="Starting Prometheus" version="(version=2.34.0, branch=HEAD, revision=881111fec4332c33094a6fb2680c71fffc427275)"
ts=2022-09-19T07:10:11.598Z caller=main.go:521 level=info build_context="(go=go1.17.8, user=root@121ad7ea5487, date=20220315-15:18:00)"
ts=2022-09-19T07:10:11.598Z caller=main.go:522 level=info host_details="(Linux 5.4.209-116.363.amzn2.x86_64 #1 SMP Wed Aug 10 21:19:18 UTC 2022 x86_64 prometheus-k8s-0 (none))"
ts=2022-09-19T07:10:11.598Z caller=main.go:523 level=info fd_limits="(soft=1048576, hard=1048576)"
ts=2022-09-19T07:10:11.598Z caller=main.go:524 level=info vm_limits="(soft=unlimited, hard=unlimited)"
ts=2022-09-19T07:10:17.337Z caller=web.go:540 level=info component=web msg="Start listening for connections" address=0.0.0.0:9090
ts=2022-09-19T07:10:17.338Z caller=main.go:937 level=info msg="Starting TSDB ..."
ts=2022-09-19T07:10:17.339Z caller=tls_config.go:231 level=info component=web msg="TLS is disabled." http2=false
ts=2022-09-19T07:10:17.345Z caller=repair.go:57 level=info component=tsdb msg="Found healthy block" mint=1663557782897 maxt=1663560000000 ulid=01GDA6M688YJB2SZW17YV42VR9
ts=2022-09-19T07:10:20.851Z caller=head.go:493 level=info component=tsdb msg="Replaying on-disk memory mappable chunks if any"
ts=2022-09-19T07:10:20.927Z caller=head.go:536 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=75.723739ms
ts=2022-09-19T07:10:20.927Z caller=head.go:542 level=info component=tsdb msg="Replaying WAL, this may take a while"
ts=2022-09-19T07:10:26.115Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=0 maxSegment=9
ts=2022-09-19T07:10:27.665Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=1 maxSegment=9
ts=2022-09-19T07:10:28.102Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=2 maxSegment=9
ts=2022-09-19T07:10:28.418Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=3 maxSegment=9
ts=2022-09-19T07:10:28.691Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=4 maxSegment=9
ts=2022-09-19T07:10:28.702Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=5 maxSegment=9
ts=2022-09-19T07:10:28.706Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=6 maxSegment=9
ts=2022-09-19T07:10:28.715Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=7 maxSegment=9
ts=2022-09-19T07:10:28.790Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=8 maxSegment=9
ts=2022-09-19T07:10:28.792Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=9 maxSegment=9
ts=2022-09-19T07:10:28.792Z caller=head.go:619 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=2.339816ms wal_replay_duration=7.862657848s total_replay_duration=7.940790025s
ts=2022-09-19T07:10:28.935Z caller=main.go:956 level=warn fs_type=NFS_SUPER_MAGIC msg="This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems."
ts=2022-09-19T07:10:28.935Z caller=main.go:961 level=info msg="TSDB started"
ts=2022-09-19T07:10:28.935Z caller=main.go:1142 level=info msg="Loading configuration file" filename=/etc/prometheus/config_out/prometheus.env.yaml
ts=2022-09-19T07:10:28.942Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.943Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.943Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.943Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.944Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.944Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.945Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.945Z caller=kubernetes.go:313 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.946Z caller=kubernetes.go:313 level=info component="discovery manager notify" discovery=kubernetes msg="Using pod service account via in-cluster config"
ts=2022-09-19T07:10:28.983Z caller=main.go:1179 level=info msg="Completed loading of configuration file" filename=/etc/prometheus/config_out/prometheus.env.yaml totalDuration=47.809688ms db_storage=1.1µs remote_storage=1.56µs web_handler=800ns query_engine=1.13µs scrape=186.416µs scrape_sd=3.349854ms notify=21.081µs notify_sd=588.296µs rules=37.154109ms tracing=5.58µs
ts=2022-09-19T07:10:28.983Z caller=main.go:910 level=info msg="Server is ready to receive web requests."
ts=2022-09-19T07:13:14.165Z caller=compact.go:519 level=info component=tsdb msg="write block" mint=1663560000000 maxt=1663567200000 ulid=01GDA9B1W61WZ1TY3GRGPJ6M9E duration=2m44.911485767s
ts=2022-09-19T07:14:06.482Z caller=head.go:840 level=info component=tsdb msg="Head GC completed" duration=1.402996698s
ts=2022-09-19T07:14:07.828Z caller=checkpoint.go:98 level=info component=tsdb msg="Creating checkpoint" from_segment=0 to_segment=5 mint=1663567200000
ts=2022-09-19T07:16:48.126Z caller=main.go:776 level=warn msg="Received SIGTERM, exiting gracefully..."
ts=2022-09-19T07:16:48.126Z caller=main.go:799 level=info msg="Stopping scrape discovery manager..."
ts=2022-09-19T07:16:48.126Z caller=main.go:813 level=info msg="Stopping notify discovery manager..."
ts=2022-09-19T07:16:48.126Z caller=main.go:835 level=info msg="Stopping scrape manager..."
ts=2022-09-19T07:16:48.126Z caller=manager.go:610 level=warn component="rule manager" group=kubernetes-system-controller-manager msg="Evaluating rule failed" rule="alert: KubeControllerManagerDown\nexpr: absent(up{job=\"kube-controller-manager\"} == 1)\nfor: 15m\nlabels:\n severity: critical\nannotations:\n description: KubeControllerManager has disappeared from Prometheus target discovery.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown\n summary: Target disappeared from Prometheus target discovery.\n" err="query timed out in expression evaluation"
ts=2022-09-19T07:16:48.127Z caller=main.go:795 level=info msg="Scrape discovery manager stopped"
ts=2022-09-19T07:16:48.129Z caller=manager.go:610 level=warn component="rule manager" group=prometheus.rules msg="Evaluating rule failed" rule="record: prometheus:up:sum\nexpr: sum(up{job=\"prometheus-k8s\",namespace=\"kubesphere-monitoring-system\"} == 1)\n" err="query timed out in query execution"
ts=2022-09-19T07:16:48.128Z caller=manager.go:610 level=warn component="rule manager" group=kubernetes-apps msg="Evaluating rule failed" rule="alert: KubePodCrashLooping\nexpr: max_over_time(kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\",reason=\"CrashLoopBackOff\"}[5m])\n >= 1\nfor: 15m\nlabels:\n severity: warning\nannotations:\n description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container\n }}) is in waiting state (reason: \"CrashLoopBackOff\").'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping\n summary: Pod is crash looping.\n" err="query timed out in query execution"
ts=2022-09-19T07:16:48.129Z caller=manager.go:610 level=warn component="rule manager" group=node.rules msg="Evaluating rule failed" rule="record: node_cpu_used_seconds_total\nexpr: sum by(cpu, instance, job, namespace, pod) (node_cpu_seconds_total{job=\"node-exporter\",mode=~\"user|nice|system|iowait|irq|softirq\"})\n" err="query timed out in query execution"
ts=2022-09-19T07:16:48.129Z caller=manager.go:610 level=warn component="rule manager" group=kube-apiserver-burnrate.rules msg="Evaluating rule failed" rule="record: apiserver_request:burnrate1d\nexpr: ((sum by(cluster) (rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n - ((sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",scope=~\"resource|\",verb=~\"LIST|GET\"}[1d]))\n or vector(0)) + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"namespace\",verb=~\"LIST|GET\"}[1d]))\n + sum by(cluster) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"30\",scope=\"cluster\",verb=~\"LIST|GET\"}[1d]))))\n + sum by(cluster) (rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[1d])))\n / sum by(cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\nlabels:\n verb: read\n" err="query timed out in expression evaluation"
ts=2022-09-19T07:16:48.130Z caller=klog.go:116 level=error component=k8s_client_runtime func=ErrorDepth msg="pkg/mod/k8s.io/client-go@v0.22.7/tools/cache/reflector.go:167: Failed to watch *v1.Service: Get \"https://10.10.0.1:443/api/v1/namespaces/default/services?allowWatchBookmarks=true&resourceVersion=6729612&timeout=8m41s&timeoutSeconds=521&watch=true\": context canceled"
ts=2022-09-19T07:16:48.130Z caller=main.go:809 level=info msg="Notify discovery manager stopped"
ts=2022-09-19T07:16:48.635Z caller=manager.go:946 level=info component="rule manager" msg="Stopping rule manager..."
ts=2022-09-19T07:16:48.635Z caller=main.go:829 level=info msg="Scrape manager stopped"
ts=2022-09-19T07:16:48.692Z caller=manager.go:956 level=info component="rule manager" msg="Rule manager stopped"
ts=2022-09-19T07:17:20.371Z caller=head.go:1009 level=info component=tsdb msg="WAL checkpoint complete" first=0 last=5 duration=3m13.858733378s
ts=2022-09-19T07:17:20.408Z caller=notifier.go:600 level=info component=notifier msg="Stopping notification manager..."
ts=2022-09-19T07:17:20.408Z caller=main.go:1068 level=info msg="Notifier manager stopped"
ts=2022-09-19T07:17:20.408Z caller=main.go:1080 level=info msg="See you next time!"