hrlc

重建一下ks-insteller的Pod,日志记录访问Etcd 超时,重建使部署脚本重新运行下。

  • hrlc 回复了此帖

    frezes 谢谢,解决了,但是安装到最后阶段 monitoring 出错了,

    Waiting for all tasks to be completed ...

    task openpitrix status is successful (1/4)

    task network status is successful (2/4)

    task multicluster status is successful (3/4)

    task monitoring status is failed (4/4)

    **************************************************

    Collecting installation results ...

    Task 'monitoring' failed:

    ******************************************************************************************************

    {

    "counter": 117,

    "created": "2023-02-03T13:49:24.655686",

    "end_line": 112,

    "event": "runner_on_failed",

    "event_data": {

    "duration": 53.624667,
    
    "end": "2023-02-03T13:49:24.655533",
    
    "event_loop": null,
    
    "host": "localhost",
    
    "ignore_errors": null,
    
    "play": "localhost",
    
    "play_pattern": "localhost",
    
    "play_uuid": "8219b820-73af-ffef-3dd1-000000000005",
    
    "playbook": "/kubesphere/playbooks/monitoring.yaml",
    
    "playbook_uuid": "d6a39677-0904-4bfa-aa9a-c2301c5131ac",
    
    "remote_addr": "127.0.0.1",
    
    "res": {
    
      "changed": true,
    
      "msg": "All items completed",
    
      "results": [
    
        {
    
          "_ansible_item_label": "prometheus",
    
          "_ansible_no_log": false,
    
          "ansible_loop_var": "item",
    
          "attempts": 5,
    
          "changed": true,
    
          "cmd": "/usr/local/bin/kubectl apply -f /kubesphere/kubesphere/prometheus/prometheus",
    
          "delta": "0:00:00.963054",
    
          "end": "2023-02-03 21:49:00.006501",
    
          "failed": true,
    
          "failed_when_result": true,
    
          "invocation": {
    
            "module_args": {
    
              "_raw_params": "/usr/local/bin/kubectl apply -f /kubesphere/kubesphere/prometheus/prometheus",
    
              "_uses_shell": true,
    
              "argv": null,
    
              "chdir": null,
    
              "creates": null,
    
              "executable": null,
    
              "removes": null,
    
              "stdin": null,
    
              "stdin_add_newline": true,
    
              "strip_empty_ends": true,
    
              "warn": true
    
            }
    
          },
    
          "item": "prometheus",
    
          "msg": "non-zero return code",
    
          "rc": 1,
    
          "start": "2023-02-03 21:48:59.043447",
    
          "stderr": "error: unable to recognize \\"/kubesphere/kubesphere/prometheus/prometheus/prometheus-podDisruptionBudget.yaml\\": no matches for kind \\"PodDisruptionBudget\\" in version \\"policy/v1beta1\\"",
    
          "stderr_lines": [
    
            "error: unable to recognize \\"/kubesphere/kubesphere/prometheus/prometheus/prometheus-podDisruptionBudget.yaml\\": no matches for kind \\"PodDisruptionBudget\\" in version \\"policy/v1beta1\\""
    
          ],
    
          "stdout": "clusterrole.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged\\nclusterrolebinding.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged\\nprometheus.monitoring.coreos.com/k8s unchanged\\nprometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules unchanged\\nrolebinding.rbac.authorization.k8s.io/prometheus-k8s-config unchanged\\nrole.rbac.authorization.k8s.io/prometheus-k8s-config unchanged\\nservice/prometheus-k8s unchanged\\nserviceaccount/prometheus-k8s unchanged\\nservicemonitor.monitoring.coreos.com/prometheus-k8s unchanged",
    
          "stdout_lines": [
    
            "clusterrole.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged",
    
            "clusterrolebinding.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged",
    
            "prometheus.monitoring.coreos.com/k8s unchanged",
    
            "prometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules unchanged",
    
            "rolebinding.rbac.authorization.k8s.io/prometheus-k8s-config unchanged",
    
            "role.rbac.authorization.k8s.io/prometheus-k8s-config unchanged",
    
            "service/prometheus-k8s unchanged",
    
            "serviceaccount/prometheus-k8s unchanged",
    
            "servicemonitor.monitoring.coreos.com/prometheus-k8s unchanged"
    
          ]
    
        },
    
        {
    
          "_ansible_item_label": "prometheus",
    
          "_ansible_no_log": false,
    
          "ansible_loop_var": "item",
    
          "attempts": 5,
    
          "changed": true,
    
          "cmd": "/usr/local/bin/kubectl apply -f /kubesphere/kubesphere/prometheus/prometheus",
    
          "delta": "0:00:01.147543",
    
          "end": "2023-02-03 21:49:24.582794",
    
          "failed": true,
    
          "failed_when_result": true,
    
          "invocation": {
    
            "module_args": {
    
              "_raw_params": "/usr/local/bin/kubectl apply -f /kubesphere/kubesphere/prometheus/prometheus",
    
              "_uses_shell": true,
    
              "argv": null,
    
              "chdir": null,
    
              "creates": null,
    
              "executable": null,
    
              "removes": null,
    
              "stdin": null,
    
              "stdin_add_newline": true,
    
              "strip_empty_ends": true,
    
              "warn": true
    
            }
    
          },
    
          "item": "prometheus",
    
          "msg": "non-zero return code",
    
          "rc": 1,
    
          "start": "2023-02-03 21:49:23.435251",
    
          "stderr": "error: unable to recognize \\"/kubesphere/kubesphere/prometheus/prometheus/prometheus-podDisruptionBudget.yaml\\": no matches for kind \\"PodDisruptionBudget\\" in version \\"policy/v1beta1\\"",
    
          "stderr_lines": [
    
            "error: unable to recognize \\"/kubesphere/kubesphere/prometheus/prometheus/prometheus-podDisruptionBudget.yaml\\": no matches for kind \\"PodDisruptionBudget\\" in version \\"policy/v1beta1\\""
    
          ],
    
          "stdout": "clusterrole.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged\\nclusterrolebinding.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged\\nprometheus.monitoring.coreos.com/k8s unchanged\\nprometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules unchanged\\nrolebinding.rbac.authorization.k8s.io/prometheus-k8s-config unchanged\\nrole.rbac.authorization.k8s.io/prometheus-k8s-config unchanged\\nservice/prometheus-k8s unchanged\\nserviceaccount/prometheus-k8s unchanged\\nservicemonitor.monitoring.coreos.com/prometheus-k8s unchanged",
    
          "stdout_lines": [
    
            "clusterrole.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged",
    
            "clusterrolebinding.rbac.authorization.k8s.io/kubesphere-prometheus-k8s unchanged",
    
            "prometheus.monitoring.coreos.com/k8s unchanged",
    
            "prometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules unchanged",
    
            "rolebinding.rbac.authorization.k8s.io/prometheus-k8s-config unchanged",
    
            "role.rbac.authorization.k8s.io/prometheus-k8s-config unchanged",
    
            "service/prometheus-k8s unchanged",
    
            "serviceaccount/prometheus-k8s unchanged",
    
            "servicemonitor.monitoring.coreos.com/prometheus-k8s unchanged"
    
          ]
    
        }
    
      ]
    
    },
    
    "resolved_action": "shell",
    
    "role": "ks-monitor",
    
    "start": "2023-02-03T13:48:31.030866",
    
    "task": "Monitoring | Installing Prometheus",
    
    "task_action": "shell",
    
    "task_args": "",
    
    "task_path": "/kubesphere/installer/roles/ks-monitor/tasks/prometheus.yaml:2",
    
    "task_uuid": "8219b820-73af-ffef-3dd1-000000000042",
    
    "uuid": "d05cb401-8641-495a-88d2-94ab5a5ae0c0"

    },

    "parent_uuid": "8219b820-73af-ffef-3dd1-000000000042",

    "pid": 2901,

    "runner_ident": "monitoring",

    "start_line": 112,

    "stdout": "",

    "uuid": "d05cb401-8641-495a-88d2-94ab5a5ae0c0"

    }

    ************************************************************************************

    而且 监控的 pod 也是正常运行的。是版本的问题吗?

    kubesphere-monitoring-system prometheus-k8s-0 2/2 Running 0 30m 10.0.3.123 k8s-admin-02 <none> <none>

    kubesphere-monitoring-system prometheus-k8s-1 2/2 Running 0 30m 10.0.0.195 k8s-admin-03 <none> <none>

    kubesphere-monitoring-system prometheus-operator-845b8fb9df-g7jp2 2/2 Running 0 31m 10.0.3.247 k8s-admin-02 <none> <none>

    • hrlc 回复了此帖

      hrlc

      最新的Kubernetes 1.26版本安装Kubernetes v3.3.2? 基于最新的Kubernetes版本社区还未进行过部署验证,你是一个勇于吃螃蟹的!

      从日志的报错看 no matches for kind \\"PodDisruptionBudget\\" in version \\"policy/v1beta1\\"" 应该是kuberentes API 升级,导致部署失败,欢迎到 https://github.com/kubesphere/kubesphere/issues 提交这个的issues。

      • hrlc 回复了此帖

        hrlc
        其实可以暂时先不用管这个,kubernetes v1.26 我们也计划支持呢。

        现在我们开始修复,日志出错在这一步,我们只要把后续几步完成就行。
        https://github.com/kubesphere/ks-installer/blob/master/roles/ks-monitor/tasks/prometheus-stack.yaml#L24
        现在,进入ks-installer 这个容器:
        kubectl exec -it -n kubesphere-monitoring-system ks-installer-xxxxx-xxx sh

        ~ $ cd  /kubesphere/kubesphere/prometheus
        /kubesphere/kubesphere/prometheus $ ls
        alertmanager         etcd                 grafana              kube-prometheus      kube-state-metrics   kubernetes           kubesphere           node-exporter        prometheus           prometheus-operator  thanos-ruler
        
        /kubesphere/kubesphere/prometheus $ kubectl apply -f kubernetes/ --force
        /kubesphere/kubesphere/prometheus $ kubectl apply -f kubesphere/
        /kubesphere/kubesphere/prometheus $ kubectl apply -f alertmanager/

          frezes
          还有一个组件,notification-manager,用于告警通知的, 这个修复起来有点费力,稍后我们会发布新的版本来修复。这个组件不影响监控功能使用。

          frezes

          该目录下没有kubesphere 目录 ,这步操作略过不要紧吧?

            hrlc
            没关系,跳过就行。
            你在检查下页面,监控应该已经正常了吧

            • hrlc 回复了此帖

              frezes 监控正常了。还是一个组件版本不匹配

              你前面说的 notification-manager 是不是就是下面这个异常?

                hrlc
                这个也没关系,这个跟上边报错一致的,kubernetes API的升级导致的,会在后续后续版本中修复。

                  3 个月 后

                  kilvn 虽然当下apply能好,但是reboot docker又恢复了,没什么意义

                    2 个月 后

                    kilvn

                    写个Dockerfile文件

                    FROM registry.cn-beijing.aliyuncs.com/kubesphereio/ks-installer:v3.3.2

                    USER root

                    RUN sed -i ‘s#policy/v1beta1#policy/v1#g’ installer/roles/common/files/redis-ha/templates/redis-ha-pdb.yaml \

                    && sed -i ‘s#policy/v1beta1#policy/v1#g’ installer/roles/gatekeeper/files/gatekeeper/templates/gatekeeper-admin-podsecuritypolicy.yaml \

                    && sed -i ‘s#policy/v1beta1#policy/v1#g’ installer/roles/gatekeeper/files/gatekeeper/templates/gatekeeper-controller-manager-poddisruptionbudget.yaml \

                    && sed -i ‘s#policy/v1beta1#policy/v1#g’ installer/roles/ks-monitor/files/prometheus/alertmanager/alertmanager-podDisruptionBudget.yaml \

                    && sed -i ‘s#policy/v1beta1#policy/v1#g’ installer/roles/ks-monitor/files/prometheus/prometheus/prometheus-podDisruptionBudget.yaml \

                    && sed -i ‘s#policy/v1beta1#policy/v1#g’ installer/roles/ks-monitor/files/prometheus/thanos-ruler/thanos-ruler-podDisruptionBudget.yaml

                    USER kubesphere

                    docker build -t xxx/kubesphereio/ks-installer:v3.3.2-v1 .

                    重新构建的镜像,把ks-installer 的镜像换成自己的,这样就可以了