ks2.1.1
centos7.5
配置的nfs,测试过可以mount -t nfs 直接挂在该nfs
异常如下:
redis无法启动:

kubesphere-system   redis-ha-haproxy-ffb8d889d-6k4m4                     0/1     Init:0/1                8          71m
kubesphere-system   redis-ha-haproxy-ffb8d889d-vgknw                     0/1     Init:0/1                8          71m
kubesphere-system   redis-ha-haproxy-ffb8d889d-vn84r                     0/1     Init:0/1                8          71m
kubesphere-system   redis-ha-server-0                                    0/2     Init:CrashLoopBackOff   17         71m


#kubectl describe po redis-ha-server-0 -n kubesphere-system
Events:
  Type     Reason            Age                   From               Message
  ----     ------            ----                  ----               -------
  Warning  FailedScheduling  <unknown>             default-scheduler  pod has unbound immediate PersistentVolumeClaims (repeated 24 times)
  Normal   Scheduled         <unknown>             default-scheduler  Successfully assigned kubesphere-system/redis-ha-server-0 to master3
  Normal   Pulled            3m13s (x5 over 6m1s)  kubelet, master3   Container image "redis:5.0.5-alpine" already present on machine
  Normal   Created           3m13s (x5 over 6m1s)  kubelet, master3   Created container config-init
  Normal   Started           3m12s (x5 over 6m)    kubelet, master3   Started container config-init
  Warning  BackOff           50s (x15 over 5m18s)  kubelet, master3   Back-off restarting failed container


[root@master1 ~]# kubectl get pvc -A
NAMESPACE           NAME                      STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
kubesphere-system   data-redis-ha-server-0    Bound    pvc-e1d024d2-2b9c-40db-84d9-1c8591f854ce   2Gi        RWO            nfs-client     74m
kubesphere-system   minio                     Bound    pvc-40e8b69b-0970-480b-a0b2-283df7d500c8   20Gi       RWO            nfs-client     74m
kubesphere-system   openldap-pvc-openldap-0   Bound    pvc-2412c812-a1cc-458e-b348-7a11b46f26c1   2Gi        RWO            nfs-client     74m
kubesphere-system   openldap-pvc-openldap-1   Bound    pvc-3c75c866-1964-4017-89d6-868a72625542   2Gi        RWO            nfs-client     72m

pvc也在,installer日志(按提示执行过):

TASK [common : Setting PersistentVolumeSize (etcd)] ****************************
skipping: [localhost]

TASK [common : Kubesphere | Check mysql PersistentVolumeClaim] *****************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/kubectl get pvc -n kubesphere-system mysql-pvc -o jsonpath='{.status.capacity.storage}'\n", "delta": "0:00:00.461772", "end": "2020-06-07 07:43:53.655077", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:43:53.193305", "stderr": "Error from server (NotFound): persistentvolumeclaims \"mysql-pvc\" not found", "stderr_lines": ["Error from server (NotFound): persistentvolumeclaims \"mysql-pvc\" not found"], "stdout": "", "stdout_lines": []}
...ignoring

TASK [common : Kubesphere | Setting mysql db pv size] **************************
skipping: [localhost]

TASK [common : Kubesphere | Check redis PersistentVolumeClaim] *****************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/kubectl get pvc -n kubesphere-system redis-pvc -o jsonpath='{.status.capacity.storage}'\n", "delta": "0:00:00.466937", "end": "2020-06-07 07:43:54.350043", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:43:53.883106", "stderr": "Error from server (NotFound): persistentvolumeclaims \"redis-pvc\" not found", "stderr_lines": ["Error from server (NotFound): persistentvolumeclaims \"redis-pvc\" not found"], "stdout": "", "stdout_lines": []}
...ignoring

TASK [common : Kubesphere | Setting redis db pv size] **************************
skipping: [localhost]

TASK [common : Kubesphere | Check minio PersistentVolumeClaim] *****************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/kubectl get pvc -n kubesphere-system minio -o jsonpath='{.status.capacity.storage}'\n", "delta": "0:00:00.466085", "end": "2020-06-07 07:43:55.047883", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:43:54.581798", "stderr": "Error from server (NotFound): persistentvolumeclaims \"minio\" not found", "stderr_lines": ["Error from server (NotFound): persistentvolumeclaims \"minio\" not found"], "stdout": "", "stdout_lines":[]}
...ignoring

TASK [common : Kubesphere | Setting minio pv size] *****************************
skipping: [localhost]

TASK [common : Kubesphere | Check openldap PersistentVolumeClaim] **************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/kubectl get pvc -n kubesphere-system openldap-pvc-openldap-0 -o jsonpath='{.status.capacity.storage}'\n", "delta": "0:00:00.464754", "end": "2020-06-07 07:43:55.745386", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:43:55.280632", "stderr": "Error from server (NotFound): persistentvolumeclaims \"openldap-pvc-openldap-0\" not found", "stderr_lines": ["Error from server (NotFound): persistentvolumeclaims \"openldap-pvc-openldap-0\" not found"], "stdout": "", "stdout_lines": []}
...ignoring

TASK [common : Kubesphere | Setting openldap pv size] **************************
skipping: [localhost]

TASK [common : Kubesphere | Check etcd db PersistentVolumeClaim] ***************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/kubectl get pvc -n kubesphere-system etcd-pvc -o jsonpath='{.status.capacity.storage}'\n", "delta": "0:00:00.466743", "end": "2020-06-07 07:43:56.439605", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:43:55.972862", "stderr": "Error from server (NotFound): persistentvolumeclaims \"etcd-pvc\" not found", "stderr_lines": ["Error from server (NotFound): persistentvolumeclaims \"etcd-pvc\" not found"], "stdout": "", "stdout_lines": []}
...ignoring

TASK [common : Kubesphere | Setting etcd pv size] ******************************
skipping: [localhost]

TASK [common : Kubesphere | Check redis ha PersistentVolumeClaim] **************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/kubectl get pvc -n kubesphere-system data-redis-ha-server-0 -o jsonpath='{.status.capacity.storage}'\n", "delta": "0:00:00.472975", "end": "2020-06-07 07:43:57.143155", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:43:56.670180", "stderr": "Error fromserver (NotFound): persistentvolumeclaims \"data-redis-ha-server-0\" not found", "stderr_lines": ["Error from server (NotFound): persistentvolumeclaims \"data-redis-ha-server-0\" not found"], "stdout": "", "stdout_lines": []}
...ignoring

TASK [common : Kubesphere | Setting redis ha pv size] **************************
skipping: [localhost]

TASK [common : Kubesphere | Creating common component manifests] ***************
changed: [localhost] => (item={u'path': u'etcd', u'file': u'etcd.yaml'})
changed: [localhost] => (item={u'name': u'mysql', u'file': u'mysql.yaml'})
changed: [localhost] => (item={u'path': u'redis', u'file': u'redis.yaml'})
TASK [common : Kubesphere | Deploy minio] **************************************
fatal: [localhost]: FAILED! => {"changed": true, "cmd": "/usr/local/bin/helm upgrade --install ks-minio /etc/kubesphere/minio-ha -f /etc/kubesphere/custom-values-minio.yaml --set fullnameOverride=minio --namespace kubesphere-system --wait --timeout 1800\n", "delta": "0:30:25.514513", "end": "2020-06-07 08:14:46.889418", "msg": "non-zero return code", "rc": 1, "start": "2020-06-07 07:44:21.374905", "stderr": "Error: timed out waiting for the condition", "stderr_lines": ["Error: timed out waiting for the condition"], "stdout": "Release \"ks-minio\" does not exist. Installing it now.", "stdout_lines": ["Release \"ks-minio\" does not exist. Installing it now."]}
...ignoring

TASK [common : debug] **********************************************************
ok: [localhost] => {
    "msg": [
        "1. check the storage configuration and storage server",
        "2. make sure the DNS address in /etc/resolv.conf is available.",
        "3. execute 'helm del --purge ks-minio && kubectl delete job -n kubesphere-system ks-minio-make-bucket-job'",
        "4. Restart the installer pod in kubesphere-system namespace"
    ]
}

TASK [common : fail] ***********************************************************
fatal: [localhost]: FAILED! => {"changed": false, "msg": "It is suggested to refer to the above methods for troubleshooting problems ."}

还有个诡异的事情,不管网络插件是使用kube_network_plugin: calico还是 kube_network_plugin: flannel
dns都解析不了:

kubectl exec -ti busybox -- nslookup kubernetes.default
Server:    10.233.0.10
Address 1: 10.233.0.10

nslookup: can't resolve 'kubernetes.default'
command terminated with exit code 1
  • zforward 我的解决办法如下,仅供参考,我的原因还是dns问题
    1、修改每个节点的/var/lib/kubelet/config.yaml里的clusterDNS 为coredns的cluster-ip,
    2、重启所有节点kubelet
    3、确保所有pod都能正确解析
    4、重启installer

@Feynman 有大佬有空帮忙看下吗,或者能不能绕过这个redis的业务模块呢

  • Jeff 回复了此帖

    Jeff 这个minio安装失败,尝试在installer配置了可以解析的dns,删掉job重新安装 仍提示:

    / # /usr/local/bin/helm upgrade --install ks-minio /etc/kubesphere/minio-ha -f /etc/kubesphere/custom-values-minio.yaml --set fullnameOverride=minio --namespace kubesphere-system --debug
    [debug] Created tunnel using local port: '36822'
    
    [debug] SERVER: "127.0.0.1:36822"
    
    Release "ks-minio" does not exist. Installing it now.
    [debug] CHART PATH: /etc/kubesphere/minio-ha
    
    Error: timed out waiting for the condition

    @Jeff DNS解决了,安装minio还是超时,对应的svc也在,这个如何调试呢

    [root@master1 kubesphere-all-v2.1.kubectl get svc -A
    NAMESPACE           NAME                  TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)                  AGE
    default             kubernetes            ClusterIP   10.233.0.1      <none>        443/TCP                  9h
    kube-system         coredns               ClusterIP   10.233.0.3      <none>        53/UDP,53/TCP,9153/TCP   8h
    kube-system         tiller-deploy         ClusterIP   10.233.29.11    <none>        44134/TCP                8h
    kubesphere-system   minio                 ClusterIP   10.233.33.247   <none>        9000/TCP                 76m
    kubesphere-system   openldap              ClusterIP   None            <none>        389/TCP                  8h
    kubesphere-system   redis                 ClusterIP   10.233.33.128   <none>        6379/TCP                 8h
    kubesphere-system   redis-ha              ClusterIP   None            <none>        6379/TCP,26379/TCP       8h
    kubesphere-system   redis-ha-announce-0   ClusterIP   10.233.53.122   <none>        6379/TCP,26379/TCP       8h
    kubesphere-system   redis-ha-announce-1   ClusterIP   10.233.37.63    <none>        6379/TCP,26379/TCP       8h
    kubesphere-system   redis-ha-announce-2   ClusterIP   10.233.58.36    <none>        6379/TCP,26379/TCP       8h
    
    [root@master1 kubesphere-all-v2.1.1]# kubectl get ep -A
    NAMESPACE           NAME                      ENDPOINTS                                                         AGE
    default             kubernetes                172.19.186.76:6443,172.19.186.77:6443,172.19.186.80:6443          9h
    kube-system         coredns                   10.233.101.1:53,10.233.96.1:53,10.233.98.2:53 + 6 more...         9h
    kube-system         kube-controller-manager   <none>                                                            9h
    kube-system         kube-scheduler            <none>                                                            9h
    kube-system         openebs.io-local          <none>                                                            8h
    kube-system         tiller-deploy             10.233.99.1:44134                                                 9h
    kubesphere-system   minio                     10.233.103.1:9000,10.233.108.2:9000                               76m
    kubesphere-system   openldap                  10.233.101.4:389,10.233.98.5:389                                  8h
    kubesphere-system   redis                     10.233.101.7:6379,10.233.96.8:6379,10.233.98.3:6379               8h
    kubesphere-system   redis-ha                  10.233.101.6:6379,10.233.96.7:6379,10.233.98.7:6379 + 3 more...   8h
    kubesphere-system   redis-ha-announce-0       10.233.96.7:6379,10.233.96.7:26379                                8h
    kubesphere-system   redis-ha-announce-1       10.233.98.7:6379,10.233.98.7:26379                                8h
    kubesphere-system   redis-ha-announce-2       10.233.101.6:6379,10.233.101.6:26379                              8h
    [root@master1 kubesphere-all-v2.1.1]# kubectl -n kubesphere-system exec -it ks-installer-79f84bd7c-m5q2f sh
    / # nslookup kubernetes.default
    nslookup: can't resolve '(null)': Name does not resolve
    
    Name:      kubernetes.default
    Address 1: 10.233.0.1 kubernetes.default.svc.cluster.local
    [root@master1 ~]# helm del --purge ks-minio
    release "ks-minio" deleted
    [root@master1 ~]# kubectl delete job -n kubesphere-system minio-make-bucket-job
    job.batch "minio-make-bucket-job" deleted
    [root@master1 ~]# kubectl -n kubesphere-system exec -it ks-installer-79f84bd7c-m5q2f sh
    / # /usr/local/bin/helm upgrade --install ks-minio /etc/kubesphere/minio-ha -f /etc/kubesphere/custom-values-minio.yaml --set fullnameOverride=minio --namespace kubesphere-system
    Release "ks-minio" does not exist. Installing it now.
    Error: timed out waiting for the conditio

    helm部署minio的时候,会有个make-bucket的job,可以看看那个job的日志,一般minio超时,都是那个玩意造成的。

    21 天 后

    zforward 我的解决办法如下,仅供参考,我的原因还是dns问题
    1、修改每个节点的/var/lib/kubelet/config.yaml里的clusterDNS 为coredns的cluster-ip,
    2、重启所有节点kubelet
    3、确保所有pod都能正确解析
    4、重启installer

    • TAO 回复了此帖
      2 个月 后

      我也碰到和你一模一样的问题

      TAO kubectl get svc -n kube-system coredns
      NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
      coredns ClusterIP 10.233.0.3 <none> 53/UDP,53/TCP,9153/TCP 269d

      这个10.233.0.3就是coredns的clusterip

      • TAO 回复了此帖

        dns pod是正常的吗?执行kubectl get pods --namespace=kube-system -l k8s-app=kube-dns