• 告警通知
  • 配置钉钉预警后,预警已触发,但是钉钉里面没收到

wanjunlei 在页面配置的webhook等一部分关键信息,在这里面看是没有的,变成空了

[root@node118 kubekey]# kubectl get nr global-dingtalk-receiver -oyaml
apiVersion: notification.kubesphere.io/v2beta2
kind: Receiver
metadata:
  annotations:
    kubesphere.io/creator: admin
  creationTimestamp: "2023-08-08T08:13:17Z"
  generation: 1
  labels:
    type: global
  name: global-dingtalk-receiver
  resourceVersion: "51748950"
  uid: 58cd94bd-fb75-464b-8496-037d63d5ebe4
spec:
  dingtalk:
    chatbot:
      keywords:
      - alert
      - k8s
      webhook:
        valueFrom:
          secretKeyRef:
            key: ""
            name: ""
    enabled: true
status: {}
[root@node118 kubekey]#
[root@node118 kubekey]# kubectl get nc default-dingtalk-config -oyaml
Error from server (NotFound): configs.notification.kubesphere.io "default-dingtalk-config" not found
[root@node118 kubekey]#
[root@node118 kubekey]#
[root@node118 kubekey]# kubectl get nm -oyaml
apiVersion: v1
items:
- apiVersion: notification.kubesphere.io/v2beta2
  kind: NotificationManager
  metadata:
    annotations:
      kubectl.kubernetes.io/last-applied-configuration: |
        {"apiVersion":"notification.kubesphere.io/v2beta2","kind":"NotificationManager","metadata":{"annotations":{},"name":"notification-manager"},"spec":{"defaultConfigSelector":{"matchLabels":{"type":"default"}},"groupLabels":["alertname","namespace"],"image":"kubesphere/notification-manager:v2.2.0","imagePullPolicy":"Always","portName":"webhook","receivers":{"globalReceiverSelector":{"matchLabels":{"type":"global"}},"options":{"dingtalk":{"notificationTimeout":5},"email":{"notificationTimeout":5},"slack":{"notificationTimeout":5},"webhook":{"notificationTimeout":5},"wechat":{"notificationTimeout":5}},"tenantKey":"user","tenantReceiverSelector":{"matchLabels":{"type":"tenant"}}},"replicas":1,"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"20Mi"}},"serviceAccountName":"notification-manager-sa","template":{"text":{"name":"notification-manager-template","namespace":"kubesphere-monitoring-system"}}}}
    creationTimestamp: "2023-08-08T05:49:27Z"
    generation: 1
    name: notification-manager
    resourceVersion: "51722951"
    uid: 57d127b9-e4e6-4738-b02d-bae024966f15
  spec:
    batchMaxSize: 100
    batchMaxWait: 1m
    defaultConfigSelector:
      matchLabels:
        type: default
    image: kubesphere/notification-manager:v2.2.0
    imagePullPolicy: Always
    portName: webhook
    receivers:
      globalReceiverSelector:
        matchLabels:
          type: global
      options:
        dingtalk:
          notificationTimeout: 5
        email:
          notificationTimeout: 5
        slack:
          notificationTimeout: 5
        webhook:
          notificationTimeout: 5
        wechat:
          notificationTimeout: 5
      tenantKey: user
      tenantReceiverSelector:
        matchLabels:
          type: tenant
    replicas: 1
    resources:
      limits:
        cpu: 500m
        memory: 1Gi
      requests:
        cpu: 100m
        memory: 20Mi
    routePolicy: All
    serviceAccountName: notification-manager-sa
kind: List
metadata:
  resourceVersion: ""
  selfLink: ""
[root@node118 kubekey]#
[root@node118 kubekey]#

你这个集群是怎么部署的?感觉不太对,不行你把notification manager的 helm 卸载了,然后重新运行下 installer

    我们配了webhook-url的,但是从上面的yaml中看, 引用的是""

    如果直接kubectl edit进行设置的话, 钉钉可以收到预警信息的。这时,如果再去【钉钉机器人设置】界面上编辑一下信息,点击确认,这个yaml中 webhook的值引用又变成空了

    不太熟悉k8s相关指令, 请问指的是这样操作吗:

    helm uninstall notification-manager   -n kubesphere-monitoring-system
    helm install notification-manager   -n kubesphere-monitoring-system

    你把这个 yaml apply 一下,然后重新设置下通知试试

    apiVersion: notification.kubesphere.io/v2beta2
    kind: NotificationManager
    metadata:
      annotations:
        meta.helm.sh/release-name: notification-manager
        meta.helm.sh/release-namespace: kubesphere-monitoring-system
      labels:
        app: notification-manager
        app.kubernetes.io/managed-by: Helm
      name: notification-manager
    spec:
      affinity: {}
      batchMaxSize: 100
      batchMaxWait: 1m
      defaultConfigSelector:
        matchLabels:
          type: default
      defaultSecretNamespace: kubesphere-monitoring-federated
      groupLabels:
      - alertname
      - namespace
      image: kubesphere/notification-manager:v2.3.0
      imagePullPolicy: IfNotPresent
      nodeSelector: {}
      portName: webhook
      receivers:
        globalReceiverSelector:
          matchLabels:
            type: global
        options:
          email:
            deliveryType: bulk
            notificationTimeout: 5
          slack:
            notificationTimeout: 5
          wechat:
            notificationTimeout: 5
        tenantKey: user
        tenantReceiverSelector:
          matchLabels:
            type: tenant
      replicas: 1
      resources:
        limits:
          cpu: 500m
          memory: 500Mi
        requests:
          cpu: 5m
          memory: 20Mi
      routePolicy: All
      serviceAccountName: notification-manager-sa
      sidecars:
        tenant:
          image: kubesphere/notification-tenant-sidecar:v3.2.0
          name: tenant
          type: kubesphere
      template:
        language: English
        languagePack:
        - name: zh-cn
          namespace: kubesphere-monitoring-system
        reloadCycle: 1m
        text:
          name: notification-manager-template
          namespace: kubesphere-monitoring-system
      tolerations: []
      volumeMounts: []
      volumes: []

    还是没有效果噢

    我这边的具体测试步骤为:

    a) kubectl delete -f 原来的notification-manager.yaml

    b) kubectl apply -f 新的notification-manager.yaml

    c) 再去【钉钉机器人设置】,简单编辑了下关键字,点击确定保存

    d) 再查看global-dingtalk-receiver,发现webhook的引用还是为空

    [root@node118 kubekey]# kubectl get receivers global-dingtalk-receiver -o yaml
    apiVersion: notification.kubesphere.io/v2beta2
    kind: Receiver
    metadata:
      creationTimestamp: "2023-08-08T08:13:17Z"
      generation: 2
      labels:
        type: global
      name: global-dingtalk-receiver
      resourceVersion: "51761332"
      uid: 58cd94bd-fb75-464b-8496-037d63d5ebe4
    spec:
      dingtalk:
        chatbot:
          keywords:
          - alert
          - k8s
          - xx
          webhook:
            valueFrom:
              secretKeyRef:
                key: ""
                name: ""
        enabled: true
    status: {}
    [root@node118 kubekey]#

    wanjunlei 现在执行这三个指令的话,是这样的

    kubectl get nr global-dingtalk-receiver -oyaml

    kubectl get nc default-dingtalk-config -oyaml

    kubectl get nm -oyaml

    [root@node118 kubekey]# kubectl get nr global-dingtalk-receiver -oyaml
    apiVersion: notification.kubesphere.io/v2beta2
    kind: Receiver
    metadata:
      creationTimestamp: "2023-08-08T08:13:17Z"
      generation: 3
      labels:
        type: global
      name: global-dingtalk-receiver
      resourceVersion: "51765178"
      uid: 58cd94bd-fb75-464b-8496-037d63d5ebe4
    spec:
      dingtalk:
        chatbot:
          keywords:
          - alert
          - k8s
          webhook:
            valueFrom:
              secretKeyRef:
                key: ""
                name: ""
        enabled: true
    status: {}
    [root@node118 kubekey]#
    [root@node118 kubekey]#
    [root@node118 kubekey]#
    [root@node118 kubekey]# kubectl get nc default-dingtalk-config -oyaml
    apiVersion: notification.kubesphere.io/v2beta2
    kind: Config
    metadata:
      annotations:
        kubesphere.io/creator: admin
      creationTimestamp: "2023-08-08T09:19:05Z"
      generation: 1
      labels:
        type: default
      name: default-dingtalk-config
      resourceVersion: "51765176"
      uid: 3cbbedc4-7ed3-497d-bb22-b14c132625d8
    spec:
      dingtalk: {}
    status: {}
    [root@node118 kubekey]#
    [root@node118 kubekey]#
    [root@node118 kubekey]#
    [root@node118 kubekey]# kubectl get nm -oyaml
    apiVersion: v1
    items:
    - apiVersion: notification.kubesphere.io/v2beta2
      kind: NotificationManager
      metadata:
        annotations:
          kubectl.kubernetes.io/last-applied-configuration: |
            {"apiVersion":"notification.kubesphere.io/v2beta2","kind":"NotificationManager","metadata":{"annotations":{"meta.helm.sh/release-name":"notification-manager","meta.helm.sh/release-namespace":"kubesphere-monitoring-system"},"labels":{"app":"notification-manager","app.kubernetes.io/managed-by":"Helm"},"name":"notification-manager"},"spec":{"affinity":{},"batchMaxSize":100,"batchMaxWait":"1m","defaultConfigSelector":{"matchLabels":{"type":"default"}},"defaultSecretNamespace":"kubesphere-monitoring-federated","groupLabels":["alertname","namespace"],"image":"kubesphere/notification-manager:v2.3.0","imagePullPolicy":"IfNotPresent","nodeSelector":{},"portName":"webhook","receivers":{"globalReceiverSelector":{"matchLabels":{"type":"global"}},"options":{"email":{"deliveryType":"bulk","notificationTimeout":5},"slack":{"notificationTimeout":5},"wechat":{"notificationTimeout":5}},"tenantKey":"user","tenantReceiverSelector":{"matchLabels":{"type":"tenant"}}},"replicas":1,"resources":{"limits":{"cpu":"500m","memory":"500Mi"},"requests":{"cpu":"5m","memory":"20Mi"}},"routePolicy":"All","serviceAccountName":"notification-manager-sa","sidecars":{"tenant":{"image":"kubesphere/notification-tenant-sidecar:v3.2.0","name":"tenant","type":"kubesphere"}},"template":{"language":"English","languagePack":[{"name":"zh-cn","namespace":"kubesphere-monitoring-system"}],"reloadCycle":"1m","text":{"name":"notification-manager-template","namespace":"kubesphere-monitoring-system"}},"tolerations":[],"volumeMounts":[],"volumes":[]}}
          meta.helm.sh/release-name: notification-manager
          meta.helm.sh/release-namespace: kubesphere-monitoring-system
        creationTimestamp: "2023-08-08T08:55:01Z"
        generation: 1
        labels:
          app: notification-manager
          app.kubernetes.io/managed-by: Helm
        name: notification-manager
        resourceVersion: "51759287"
        uid: 97f2b102-b117-425a-97da-023441b390de
      spec:
        affinity: {}
        batchMaxSize: 100
        batchMaxWait: 1m
        defaultConfigSelector:
          matchLabels:
            type: default
        defaultSecretNamespace: kubesphere-monitoring-federated
        groupLabels:
        - alertname
        - namespace
        image: kubesphere/notification-manager:v2.3.0
        imagePullPolicy: IfNotPresent
        nodeSelector: {}
        portName: webhook
        receivers:
          globalReceiverSelector:
            matchLabels:
              type: global
          options:
            email:
              deliveryType: bulk
              notificationTimeout: 5
            slack:
              notificationTimeout: 5
            wechat:
              notificationTimeout: 5
          tenantKey: user
          tenantReceiverSelector:
            matchLabels:
              type: tenant
        replicas: 1
        resources:
          limits:
            cpu: 500m
            memory: 500Mi
          requests:
            cpu: 5m
            memory: 20Mi
        routePolicy: All
        serviceAccountName: notification-manager-sa
        sidecars:
          tenant:
            image: kubesphere/notification-tenant-sidecar:v3.2.0
            name: tenant
            type: kubesphere
        template:
          language: English
          languagePack:
          - name: zh-cn
            namespace: kubesphere-monitoring-system
          reloadCycle: 1m
          text:
            name: notification-manager-template
            namespace: kubesphere-monitoring-system
        tolerations: []
        volumeMounts: []
        volumes: []
    kind: List
    metadata:
      resourceVersion: ""
      selfLink: ""
    [root@node118 kubekey]#
    [root@node118 kubekey]#

    JustryDeng

    在页面上编辑后, 进行查看,还是引用的空

    [root@node118 kubekey]# kubectl get receivers global-dingtalk-receiver -o yaml
    apiVersion: notification.kubesphere.io/v2beta2
    kind: Receiver
    metadata:
      creationTimestamp: "2023-08-08T08:13:17Z"
      generation: 4
      labels:
        type: global
      name: global-dingtalk-receiver
      resourceVersion: "51768898"
      uid: 58cd94bd-fb75-464b-8496-037d63d5ebe4
    spec:
      dingtalk:
        chatbot:
          keywords:
          - alert
          - k8s
          - yy
          webhook:
            valueFrom:
              secretKeyRef:
                key: ""
                name: ""
        enabled: true
    status: {}
    [root@node118 kubekey]#

    我在想,是不是前端页面 和 后端notification-manager约定的字段有出入导致的部分数据丢失? 怎么检查一下相关组件的版本对应关系是否正确呢

    我看了,当前运行的容器,都是用的3.4.0的镜像拉起的

    其实我们怀疑,是我们升级到3.4.0时,遗留了不知道什么问题导致的。我们当时升级时,就遇到了一堆杂七杂八的问题,虽然后面磕磕绊绊解决了 ,但是总感觉不太踏实的样子。 或许可以看看怎么能 全方位检查一下3.4.0是否升级有问题这方面入手

    下班了,明天再继续分析下吧, 感谢支持

    @“wanjunlei” 你好,我发现再页面上编辑【钉钉机器人】点击确认时, 控制台有个not found报错提示

    说是找不到【default-dingtalk-config】,但是我用命令行查看配置,发现是有的

    怎么从这方面入手分析呢

    额,我又多试了几次,不稳定报错404。。。 有时候会报,有时候不会报

    kubectl delete nc default-dingtalk-config

    kubectl delete nr global-dingtalk-receiver

    kubectl delete secret -n kubesphere-monitoring-federated global-dingtalk-config-secret

    把钉钉的配置清理一下,然后重新设置

    好的,我这边试一下

    同时,我这边观察到一个现象,不知道和这个有没有关系(部分版本不一致notification.kubesphere.io/v2beta2 和notification.kubesphere.io/v2beta1同时存在了)