wanjunlei 这个是其中一个pod的日志

{“error”:{“root_cause”:[{“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=300086987, replica_bytes=17952278, all_bytes=318039265, coordinating_operation_bytes=4340513, max_coordinating_and_primary_bytes=322122547]”}],“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=300086987, replica_bytes=17952278, all_bytes=318039265, coordinating_operation_bytes=4340513, max_coordinating_and_primary_bytes=322122547]”},“status”:429}

[2023/01/13 10:54:17] [ warn] [engine] failed to flush chunk ‘17-1673607251.835139423.flb’, retry in 11 seconds: task_id=1, input=systemd.2 > output=es.0 (out_id=0)

[2023/01/13 10:54:17] [error] [output:es:es.0] HTTP status=429 URI=/_bulk, response:

{“error”:{“root_cause”:[{“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=300086987, replica_bytes=17952278, all_bytes=318039265, coordinating_operation_bytes=4338731, max_coordinating_and_primary_bytes=322122547]”}],“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=300086987, replica_bytes=17952278, all_bytes=318039265, coordinating_operation_bytes=4338731, max_coordinating_and_primary_bytes=322122547]”},“status”:429}

[2023/01/13 10:54:17] [ warn] [engine] failed to flush chunk ‘17-1673607251.664272896.flb’, retry in 9 seconds: task_id=0, input=systemd.2 > output=es.0 (out_id=0)

[2023/01/13 10:54:17] [error] [output:es:es.0] HTTP status=429 URI=/_bulk, response:

{“error”:{“root_cause”:[{“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=300086987, replica_bytes=17952278, all_bytes=318039265, coordinating_operation_bytes=4341587, max_coordinating_and_primary_bytes=322122547]”}],“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=300086987, replica_bytes=17952278, all_bytes=318039265, coordinating_operation_bytes=4341587, max_coordinating_and_primary_bytes=322122547]”},“status”:429}

[2023/01/13 10:54:17] [ warn] [engine] failed to flush chunk ‘17-1673607252.621261412.flb’, retry in 8 seconds: task_id=5, input=systemd.2 > output=es.0 (out_id=0)

[2023/01/13 10:54:24] [error] [output:es:es.0] HTTP status=429 URI=/_bulk, response:

{“error”:{“root_cause”:[{“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=288825403, replica_bytes=34626716, all_bytes=323452119, coordinating_operation_bytes=4341587, max_coordinating_and_primary_bytes=322122547]”}],“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=288825403, replica_bytes=34626716, all_bytes=323452119, coordinating_operation_bytes=4341587, max_coordinating_and_primary_bytes=322122547]”},“status”:429}

[2023/01/13 10:54:24] [ warn] [engine] chunk ‘17-1673607252.621261412.flb’ cannot be retried: task_id=5, input=systemd.2 > output=es.0

[2023/01/13 10:54:25] [error] [output:es:es.0] HTTP status=429 URI=/_bulk, response:

{“error”:{“root_cause”:[{“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=287230797, replica_bytes=32292331, all_bytes=319523128, coordinating_operation_bytes=4338731, max_coordinating_and_primary_bytes=322122547]”}],“type”:“es_rejected_execution_exception”,“reason”:“rejected execution of coordinating operation [coordinating_and_primary_bytes=287230797, replica_bytes=32292331, all_bytes=319523128, coordinating_operation_bytes=4338731, max_coordinating_and_primary_bytes=322122547]”},“status”:429}

[2023/01/13 10:54:25] [ warn] [engine] chunk ‘17-1673607251.664272896.flb’ cannot be retried: task_id=0, input=systemd.2 > output=es.0

[2023/01/13 10:54:41] [ warn] [http_client] cannot increase buffer: current=512000 requested=544768 max=512000

[2023/01/13 10:54:41] [error] [output:es:es.0] could not pack/validate JSON response

{“took”:19135,“errors”:true,“items”:[{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“9a53d7db-a11b-e900-1257-5308496b9fa4”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137969,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“e582829b-9b47-ed58-7e94-99b79b020d0c”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137970,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“20e6722c-6233-aa47-1746-c388bdad9510”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137971,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“fa5d8fce-4cd4-ce45-a226-b839a7e3f62e”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137972,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“3f62d996-1d20-b3df-006b-8575859ca7ea”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137973,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“dc03f40e-fb37-fbbc-cefe-8148a358887a”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137974,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“c47d7a1a-fd4a-a689-3897-db3fd75871d9”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137975,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“a1da67ae-eee3-a6d6-2e50-89e1f1b625f5”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137976,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“2ffe86c4-5949-5e81-1af5-1d24253e00a0”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137977,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“55afe9d4-ac7b-0884-1848-46521b0a2fc5”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137978,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“5557cbd9-31b8-990d-ca4c-fae691bf65aa”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137979,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“e8914c52-18af-c5e1-f53a-6e7f3ae1ac0c”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137980,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“ab492b4f-de3a-389a-29d1-24ee40e4c8cb”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137981,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“f3f61f6b-21ac-f551-1624-16ae91de66b5”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137982,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“a0d68c2f-b199-682e-d453-921ed0533b85”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137983,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“33ad7988-e765-c9e1-497b-1bec067da9c5”,“version”:1,“result”:“created”,“shards”:{“total”:2,“successful”:2,“failed”:0},“seq_no”:137984,“primary_term”:3,“status”:201}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“ac59d8de-a64f-84cd-8979-f13a3149dbcd”,“_version”:1,“result”:"created

[2023/01/13 10:54:41] [ warn] [engine] failed to flush chunk ‘17-1673607252.30882341.flb’, retry in 10 seconds: task_id=2, input=systemd.2 > output=es.0 (out_id=0)

[2023/01/13 10:54:42] [ warn] [http_client] cannot increase buffer: current=512000 requested=544768 max=512000

[2023/01/13 10:54:42] [ warn] [http_client] cannot increase buffer: current=512000 requested=544768 max=512000

[2023/01/13 10:54:44] [ warn] [http_client] cannot increase buffer: current=512000 requested=544768 max=512000

[2023/01/13 10:54:50] [ warn] [http_client] cannot increase buffer: current=512000 requested=544768 max=512000

[2023/01/13 10:55:26] [ warn] [http_client] cannot increase buffer: current=512000 requested=544768 max=512000

[2023/01/13 10:55:26] [error] [output:es:es.0] could not pack/validate JSON response

{“took”:28284,“errors”:true,“items”:[{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“9a53d7db-a11b-e900-1257-5308496b9fa4”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[9a53d7db-a11b-e900-1257-5308496b9fa4]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“e582829b-9b47-ed58-7e94-99b79b020d0c”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[e582829b-9b47-ed58-7e94-99b79b020d0c]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“20e6722c-6233-aa47-1746-c388bdad9510”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[20e6722c-6233-aa47-1746-c388bdad9510]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“fa5d8fce-4cd4-ce45-a226-b839a7e3f62e”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[fa5d8fce-4cd4-ce45-a226-b839a7e3f62e]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“3f62d996-1d20-b3df-006b-8575859ca7ea”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[3f62d996-1d20-b3df-006b-8575859ca7ea]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“dc03f40e-fb37-fbbc-cefe-8148a358887a”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[dc03f40e-fb37-fbbc-cefe-8148a358887a]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“c47d7a1a-fd4a-a689-3897-db3fd75871d9”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[c47d7a1a-fd4a-a689-3897-db3fd75871d9]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“a1da67ae-eee3-a6d6-2e50-89e1f1b625f5”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[a1da67ae-eee3-a6d6-2e50-89e1f1b625f5]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“2ffe86c4-5949-5e81-1af5-1d24253e00a0”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[2ffe86c4-5949-5e81-1af5-1d24253e00a0]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“55afe9d4-ac7b-0884-1848-46521b0a2fc5”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”,“reason”:“[55afe9d4-ac7b-0884-1848-46521b0a2fc5]: version conflict, document already exists (current version [1])”,“index_uuid”:“Jp9hHFS_TdqnHzk4h1y_zw”,“shard”:“0”,“index”:“ks-logstash-log-2023.01.08”}}},{“create”:{“index”:“ks-logstash-log-2023.01.08”,“type”:“doc”,“id”:“5557cbd9-31b8-990d-ca4c-fae691bf65aa”,“status”:409,“error”:{“type”:“version_conflict_engine_exception”

[2023/01/13 10:55:26] [ warn] [engine] chunk ‘17-1673607252.30882341.flb’ cannot be retried: task_id=2, input=systemd.2 > output=es.0

这个是es的问题,你先查下es为什么会报429

kubectl edit output -n kubesphere-logging-system es

添加

spec:
  es:
    traceError: true

可以看到详细的报错

    kubectl get secret -n kubesphere-logging-system fluent-bit-config -oyaml

    把 fb的配置文件发出来看看

      wanjunlei

      apiVersion: v1

      data:

      containerd.lua: ZnVuY3Rpb24gY29udGFpbmVyZCggdGFnLCB0aW1lc3RhbXAsIHJlY29yZCkKICAgICAgIGlmKHJlY29yZFsibG9ndGFnIl1+PW5pbCkKICAgICAgIHRoZW4KICAgICAgIHRpbWVTdHIgPSBvcy5kYXRlKCIhKnQiLCAgdGltZXN0YW1wWyJzZWMiXSkKICAgICAgICB0ID0gc3RyaW5nLmZvcm1hdCgiJTRkLSUwMmQtJTAyZFQlMDJkOiUwMmQ6JTAyZC4lc1oiLAogICAgICAgIHRpbWVTdHJbInllYXIiXSwgdGltZVN0clsibW9udGgiXSwgdGltZVN0clsiZGF5Il0sCiAgICAgICAgdGltZVN0clsiaG91ciJdLCB0aW1lU3RyWyJtaW4iXSwgdGltZVN0clsic2VjIl0sCiAgICAgICAgdGltZXN0YW1wWyJuc2VjIl0pOwogICAgICAgIHJlY29yZFsidGltZSJdID0gdDsKICAgICAgICByZWNvcmRbImxvZyJdID0gcmVjb3JkWyJtZXNzYWdlIl07CiAgICAgICAgcmVjb3JkWyJtZXNzYWdlIl0gPSAgbmlsOwogICAgICAgIHJldHVybiAxLCB0aW1lc3RhbXAsIHJlY29yZAogICAgICAgIGVsc2UKICAgICAgICByZXR1cm4gMCx0aW1lc3RhbXAscmVjb3JkCiAgICAgICBlbmQKZW5k

      fluent-bit.conf: W1NlcnZpY2VdCiAgICBQYXJzZXJzX0ZpbGUgICAgcGFyc2Vycy5jb25mCltJbnB1dF0KICAgIE5hbWUgICAgc3lzdGVtZAogICAgUGF0aCAgICAvdmFyL2xvZy9qb3VybmFsCiAgICBEQiAgICAvZmx1ZW50LWJpdC90YWlsL2RvY2tlci5kYgogICAgREIuU3luYyAgICBOb3JtYWwKICAgIFRhZyAgICBzZXJ2aWNlLmNyaW8KICAgIFN5c3RlbWRfRmlsdGVyICAgIF9TWVNURU1EX1VOSVQ9Y3Jpby5zZXJ2aWNlCltJbnB1dF0KICAgIE5hbWUgICAgc3lzdGVtZAogICAgUGF0aCAgICAvdmFyL2xvZy9qb3VybmFsCiAgICBEQiAgICAvZmx1ZW50LWJpdC90YWlsL2RvY2tlci5kYgogICAgREIuU3luYyAgICBOb3JtYWwKICAgIFRhZyAgICBzZXJ2aWNlLmRvY2tlcgogICAgU3lzdGVtZF9GaWx0ZXIgICAgX1NZU1RFTURfVU5JVD1kb2NrZXIuc2VydmljZQpbSW5wdXRdCiAgICBOYW1lICAgIHN5c3RlbWQKICAgIFBhdGggICAgL3Zhci9sb2cvam91cm5hbAogICAgREIgICAgL2ZsdWVudC1iaXQvdGFpbC9rdWJlbGV0LmRiCiAgICBEQi5TeW5jICAgIE5vcm1hbAogICAgVGFnICAgIHNlcnZpY2Uua3ViZWxldAogICAgU3lzdGVtZF9GaWx0ZXIgICAgX1NZU1RFTURfVU5JVD1rdWJlbGV0LnNlcnZpY2UKW0lucHV0XQogICAgTmFtZSAgICB0YWlsCiAgICBQYXRoICAgIC92YXIvbG9nL2NvbnRhaW5lcnMvKi5sb2cKICAgIEV4Y2x1ZGVfUGF0aCAgICAvdmFyL2xvZy9jb250YWluZXJzLypfa3ViZXNwaGVyZS1sb2dnaW5nLXN5c3RlbV9ldmVudHMtZXhwb3J0ZXIqLmxvZywvdmFyL2xvZy9jb250YWluZXJzL2t1YmUtYXVkaXRpbmctd2ViaG9vaypfa3ViZXNwaGVyZS1sb2dnaW5nLXN5c3RlbV9rdWJlLWF1ZGl0aW5nLXdlYmhvb2sqLmxvZwogICAgUmVmcmVzaF9JbnRlcnZhbCAgICAxMAogICAgU2tpcF9Mb25nX0xpbmVzICAgIHRydWUKICAgIERCICAgIC9mbHVlbnQtYml0L3RhaWwvcG9zLmRiCiAgICBEQi5TeW5jICAgIE5vcm1hbAogICAgTWVtX0J1Zl9MaW1pdCAgICA1TUIKICAgIFBhcnNlciAgICBjcmkKICAgIFRhZyAgICBrdWJlLioKW0lucHV0XQogICAgTmFtZSAgICB0YWlsCiAgICBQYXRoICAgIC92YXIvbG9nL2NvbnRhaW5lcnMva3ViZS1hdWRpdGluZy13ZWJob29rKl9rdWJlc3BoZXJlLWxvZ2dpbmctc3lzdGVtX2t1YmUtYXVkaXRpbmctd2ViaG9vayoubG9nCiAgICBSZWZyZXNoX0ludGVydmFsICAgIDEwCiAgICBTa2lwX0xvbmdfTGluZXMgICAgdHJ1ZQogICAgREIgICAgL2ZsdWVudC1iaXQvdGFpbC9wb3MtYXVkaXRpbmcuZGIKICAgIERCLlN5bmMgICAgTm9ybWFsCiAgICBNZW1fQnVmX0xpbWl0ICAgIDVNQgogICAgUGFyc2VyICAgIGNyaQogICAgVGFnICAgIGt1YmVfYXVkaXRpbmcKW0lucHV0XQogICAgTmFtZSAgICB0YWlsCiAgICBQYXRoICAgIC92YXIvbG9nL2NvbnRhaW5lcnMvKl9rdWJlc3BoZXJlLWxvZ2dpbmctc3lzdGVtX2V2ZW50cy1leHBvcnRlcioubG9nCiAgICBSZWZyZXNoX0ludGVydmFsICAgIDEwCiAgICBTa2lwX0xvbmdfTGluZXMgICAgdHJ1ZQogICAgREIgICAgL2ZsdWVudC1iaXQvdGFpbC9wb3MtZXZlbnRzLmRiCiAgICBEQi5TeW5jICAgIE5vcm1hbAogICAgTWVtX0J1Zl9MaW1pdCAgICA1TUIKICAgIFBhcnNlciAgICBjcmkKICAgIFRhZyAgICBrdWJlX2V2ZW50cwpbRmlsdGVyXQogICAgTmFtZSAgICBsdWEKICAgIE1hdGNoICAgIGt1YmUuKgogICAgc2NyaXB0ICAgIC9mbHVlbnQtYml0L2NvbmZpZy9jb250YWluZXJkLmx1YQogICAgY2FsbCAgICBjb250YWluZXJkCiAgICB0aW1lX2FzX3RhYmxlICAgIHRydWUKW0ZpbHRlcl0KICAgIE5hbWUgICAgcGFyc2VyCiAgICBNYXRjaCAgICBrdWJlX2F1ZGl0aW5nCiAgICBLZXlfTmFtZSAgICBtZXNzYWdlCiAgICBQYXJzZXIgICAganNvbgpbRmlsdGVyXQogICAgTmFtZSAgICBtb2RpZnkKICAgIE1hdGNoICAgIGt1YmVfYXVkaXRpbmcKICAgIENvbmRpdGlvbiAgICBLZXlfZG9lc19ub3RfZXhpc3QgICAgQXVkaXRJRCAgICAKICAgIEFkZCAgICBpZ25vcmUgICAgdHJ1ZQpbRmlsdGVyXQogICAgTmFtZSAgICBncmVwCiAgICBNYXRjaCAgICBrdWJlX2F1ZGl0aW5nCiAgICBFeGNsdWRlICAgIGlnbm9yZSB0cnVlCltGaWx0ZXJdCiAgICBOYW1lICAgIHBhcnNlcgogICAgTWF0Y2ggICAga3ViZV9ldmVudHMKICAgIEtleV9OYW1lICAgIG1lc3NhZ2UKICAgIFBhcnNlciAgICBqc29uCltGaWx0ZXJdCiAgICBOYW1lICAgIGt1YmVybmV0ZXMKICAgIE1hdGNoICAgIGt1YmUuKgogICAgS3ViZV9VUkwgICAgaHR0cHM6Ly9rdWJlcm5ldGVzLmRlZmF1bHQuc3ZjOjQ0MwogICAgS3ViZV9DQV9GaWxlICAgIC92YXIvcnVuL3NlY3JldHMva3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9jYS5jcnQKICAgIEt1YmVfVG9rZW5fRmlsZSAgICAvdmFyL3J1bi9zZWNyZXRzL2t1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvdG9rZW4KICAgIExhYmVscyAgICBmYWxzZQogICAgQW5ub3RhdGlvbnMgICAgZmFsc2UKW0ZpbHRlcl0KICAgIE5hbWUgICAgbmVzdAogICAgTWF0Y2ggICAga3ViZS4qCiAgICBPcGVyYXRpb24gICAgbGlmdAogICAgTmVzdGVkX3VuZGVyICAgIGt1YmVybmV0ZXMKICAgIEFkZF9wcmVmaXggICAga3ViZXJuZXRlc18KW0ZpbHRlcl0KICAgIE5hbWUgICAgbW9kaWZ5CiAgICBNYXRjaCAgICBrdWJlLioKICAgIFJlbW92ZSAgICBzdHJlYW0KICAgIFJlbW92ZSAgICBrdWJlcm5ldGVzX3BvZF9pZAogICAgUmVtb3ZlICAgIGt1YmVybmV0ZXNfaG9zdAogICAgUmVtb3ZlICAgIGt1YmVybmV0ZXNfY29udGFpbmVyX2hhc2gKW0ZpbHRlcl0KICAgIE5hbWUgICAgbmVzdAogICAgTWF0Y2ggICAga3ViZS4qCiAgICBPcGVyYXRpb24gICAgbmVzdAogICAgV2lsZGNhcmQgICAga3ViZXJuZXRlc18qCiAgICBOZXN0X3VuZGVyICAgIGt1YmVybmV0ZXMKICAgIFJlbW92ZV9wcmVmaXggICAga3ViZXJuZXRlc18KW0ZpbHRlcl0KICAgIE5hbWUgICAgbHVhCiAgICBNYXRjaCAgICBzZXJ2aWNlLioKICAgIHNjcmlwdCAgICAvZmx1ZW50LWJpdC9jb25maWcvc3lzdGVtZC5sdWEKICAgIGNhbGwgICAgYWRkX3RpbWUKICAgIHRpbWVfYXNfdGFibGUgICAgdHJ1ZQpbT3V0cHV0XQogICAgTmFtZSAgICBlcwogICAgTWF0Y2hfUmVnZXggICAgKD86a3ViZXxzZXJ2aWNlKVwuKC4qKQogICAgSG9zdCAgICAxMC4zMy4xMC4xNwogICAgUG9ydCAgICA5MjAwCiAgICBMb2dzdGFzaF9Gb3JtYXQgICAgdHJ1ZQogICAgTG9nc3Rhc2hfUHJlZml4ICAgIGtzLWxvZ3N0YXNoLWxvZwogICAgVGltZV9LZXkgICAgQHRpbWVzdGFtcAogICAgR2VuZXJhdGVfSUQgICAgdHJ1ZQogICAgVHJhY2VfRXJyb3IgICAgdHJ1ZQpbT3V0cHV0XQogICAgTmFtZSAgICBlcwogICAgTWF0Y2ggICAga3ViZV9hdWRpdGluZwogICAgSG9zdCAgICAxMC4zMy4xMC4xNwogICAgUG9ydCAgICA5MjAwCiAgICBMb2dzdGFzaF9Gb3JtYXQgICAgdHJ1ZQogICAgTG9nc3Rhc2hfUHJlZml4ICAgIGtzLWxvZ3N0YXNoLWF1ZGl0aW5nCiAgICBHZW5lcmF0ZV9JRCAgICB0cnVlCltPdXRwdXRdCiAgICBOYW1lICAgIGVzCiAgICBNYXRjaCAgICBrdWJlX2V2ZW50cwogICAgSG9zdCAgICAxMC4zMy4xMC4xNwogICAgUG9ydCAgICA5MjAwCiAgICBMb2dzdGFzaF9Gb3JtYXQgICAgdHJ1ZQogICAgTG9nc3Rhc2hfUHJlZml4ICAgIGtzLWxvZ3N0YXNoLWV2ZW50cwogICAgR2VuZXJhdGVfSUQgICAgdHJ1ZQo=

      parsers.conf: ""

      systemd.lua: ZnVuY3Rpb24gYWRkX3RpbWUodGFnLCB0aW1lc3RhbXAsIHJlY29yZCkKICBuZXdfcmVjb3JkID0ge30KICB0aW1lU3RyID0gb3MuZGF0ZSgiISp0IiwgdGltZXN0YW1wWyJzZWMiXSkKICB0ID0gc3RyaW5nLmZvcm1hdCgiJTRkLSUwMmQtJTAyZFQlMDJkOiUwMmQ6JTAyZC4lc1oiLAoJCXRpbWVTdHJbInllYXIiXSwgdGltZVN0clsibW9udGgiXSwgdGltZVN0clsiZGF5Il0sCgkJdGltZVN0clsiaG91ciJdLCB0aW1lU3RyWyJtaW4iXSwgdGltZVN0clsic2VjIl0sCgkJdGltZXN0YW1wWyJuc2VjIl0pCiAga3ViZXJuZXRlcyA9IHt9CiAga3ViZXJuZXRlc1sicG9kX25hbWUiXSA9IHJlY29yZFsiX0hPU1ROQU1FIl0KICBrdWJlcm5ldGVzWyJjb250YWluZXJfbmFtZSJdID0gcmVjb3JkWyJTWVNMT0dfSURFTlRJRklFUiJdCiAga3ViZXJuZXRlc1sibmFtZXNwYWNlX25hbWUiXSA9ICJrdWJlLXN5c3RlbSIKICBuZXdfcmVjb3JkWyJ0aW1lIl0gPSB0CiAgbmV3X3JlY29yZFsibG9nIl0gPSByZWNvcmRbIk1FU1NBR0UiXQogIG5ld19yZWNvcmRbImt1YmVybmV0ZXMiXSA9IGt1YmVybmV0ZXMKICByZXR1cm4gMSwgdGltZXN0YW1wLCBuZXdfcmVjb3JkCmVuZA==

      kind: Secret

      metadata:

      creationTimestamp: "2022-05-10T12:11:27Z"

      managedFields:

      - apiVersion: v1

      fieldsType: FieldsV1

      fieldsV1:

      f:data:

      .: {}

      f:containerd.lua: {}

      f:fluent-bit.conf: {}

      f:parsers.conf: {}

      f:systemd.lua: {}

      f:metadata:

      f:ownerReferences:

      .: {}

      k:{"uid":"da908ca0-a481-4fc4-a561-204488c3db90"}:

      .: {}

      f:apiVersion: {}

      f:blockOwnerDeletion: {}

      f:controller: {}

      f:kind: {}

      f:name: {}

      f:uid: {}

      f:type: {}

      manager: manager

      operation: Update

      time: "2023-01-09T07:55:45Z"

      name: fluent-bit-config

      namespace: kubesphere-logging-system

      ownerReferences:

      - apiVersion: logging.kubesphere.io/v1alpha2

      blockOwnerDeletion: true

      controller: true

      kind: FluentBitConfig

      name: fluent-bit-config

      uid: da908ca0-a481-4fc4-a561-204488c3db90

      resourceVersion: "144407134"

      selfLink: /api/v1/namespaces/kubesphere-logging-system/secrets/fluent-bit-config

      uid: 5edfd227-e937-40c4-a1ef-cb3788cff2f6

      type: Opaque

      containerd.lua:

      function containerd( tag, timestamp, record)
             if(record["logtag"]~=nil)
             then
             timeStr = os.date("!*t",  timestamp["sec"])
              t = string.format("%4d-%02d-%02dT%02d:%02d:%02d.%sZ",
              timeStr["year"], timeStr["month"], timeStr["day"],
              timeStr["hour"], timeStr["min"], timeStr["sec"],
              timestamp["nsec"]);
              record["time"] = t;
              record["log"] = record["message"];
              record["message"] =  nil;
              return 1, timestamp, record
              else
              return 0,timestamp,record
             end
      end

      fluent-bit.conf:

      [Service]
          Parsers_File    parsers.conf
      [Input]
          Name    systemd
          Path    /var/log/journal
          DB    /fluent-bit/tail/docker.db
          DB.Sync    Normal
          Tag    service.crio
          Systemd_Filter    _SYSTEMD_UNIT=crio.service
      [Input]
          Name    systemd
          Path    /var/log/journal
          DB    /fluent-bit/tail/docker.db
          DB.Sync    Normal
          Tag    service.docker
          Systemd_Filter    _SYSTEMD_UNIT=docker.service
      [Input]
          Name    systemd
          Path    /var/log/journal
          DB    /fluent-bit/tail/kubelet.db
          DB.Sync    Normal
          Tag    service.kubelet
          Systemd_Filter    _SYSTEMD_UNIT=kubelet.service
      [Input]
          Name    tail
          Path    /var/log/containers/*.log
          Exclude_Path    /var/log/containers/*_kubesphere-logging-system_events-exporter*.log,/var/log/containers/kube-auditing-webhook*_kubesphere-logging-system_kube-auditing-webhook*.log
          Refresh_Interval    10
          Skip_Long_Lines    true
          DB    /fluent-bit/tail/pos.db
          DB.Sync    Normal
          Mem_Buf_Limit    5MB
          Parser    cri
          Tag    kube.*
      [Input]
          Name    tail
          Path    /var/log/containers/kube-auditing-webhook*_kubesphere-logging-system_kube-auditing-webhook*.log
          Refresh_Interval    10
          Skip_Long_Lines    true
          DB    /fluent-bit/tail/pos-auditing.db
          DB.Sync    Normal
          Mem_Buf_Limit    5MB
          Parser    cri
          Tag    kube_auditing
      [Input]
          Name    tail
          Path    /var/log/containers/*_kubesphere-logging-system_events-exporter*.log
          Refresh_Interval    10
          Skip_Long_Lines    true
          DB    /fluent-bit/tail/pos-events.db
          DB.Sync    Normal
          Mem_Buf_Limit    5MB
          Parser    cri
          Tag    kube_events
      [Filter]
          Name    lua
          Match    kube.*
          script    /fluent-bit/config/containerd.lua
          call    containerd
          time_as_table    true
      [Filter]
          Name    parser
          Match    kube_auditing
          Key_Name    message
          Parser    json
      [Filter]
          Name    modify
          Match    kube_auditing
          Condition    Key_does_not_exist    AuditID    
          Add    ignore    true
      [Filter]
          Name    grep
          Match    kube_auditing
          Exclude    ignore true
      [Filter]
          Name    parser
          Match    kube_events
          Key_Name    message
          Parser    json
      [Filter]
          Name    kubernetes
          Match    kube.*
          Kube_URL    https://kubernetes.default.svc:443
          Kube_CA_File    /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          Kube_Token_File    /var/run/secrets/kubernetes.io/serviceaccount/token
          Labels    false
          Annotations    false
      [Filter]
          Name    nest
          Match    kube.*
          Operation    lift
          Nested_under    kubernetes
          Add_prefix    kubernetes_
      [Filter]
          Name    modify
          Match    kube.*
          Remove    stream
          Remove    kubernetes_pod_id
          Remove    kubernetes_host
          Remove    kubernetes_container_hash
      [Filter]
          Name    nest
          Match    kube.*
          Operation    nest
          Wildcard    kubernetes_*
          Nest_under    kubernetes
          Remove_prefix    kubernetes_
      [Filter]
          Name    lua
          Match    service.*
          script    /fluent-bit/config/systemd.lua
          call    add_time
          time_as_table    true
      [Output]
          Name    es
          Match_Regex    (?:kube|service)\.(.*)
          Host    10.33.10.17
          Port    9200
          Logstash_Format    true
          Logstash_Prefix    ks-logstash-log
          Time_Key    @timestamp
          Generate_ID    true
          Trace_Error    true
      [Output]
          Name    es
          Match    kube_auditing
          Host    10.33.10.17
          Port    9200
          Logstash_Format    true
          Logstash_Prefix    ks-logstash-auditing
          Generate_ID    true
      [Output]
          Name    es
          Match    kube_events
          Host    10.33.10.17
          Port    9200
          Logstash_Format    true
          Logstash_Prefix    ks-logstash-events
          Generate_ID    true

      systemd.lua:

      function add_time(tag, timestamp, record)
        new_record = {}
        timeStr = os.date("!*t", timestamp["sec"])
        t = string.format("%4d-%02d-%02dT%02d:%02d:%02d.%sZ",
      		timeStr["year"], timeStr["month"], timeStr["day"],
      		timeStr["hour"], timeStr["min"], timeStr["sec"],
      		timestamp["nsec"])
        kubernetes = {}
        kubernetes["pod_name"] = record["_HOSTNAME"]
        kubernetes["container_name"] = record["SYSLOG_IDENTIFIER"]
        kubernetes["namespace_name"] = "kube-system"
        new_record["time"] = t
        new_record["log"] = record["MESSAGE"]
        new_record["kubernetes"] = kubernetes
        return 1, timestamp, new_record
      end

      从fb的配置文件看你的runtime是containerd,但是你上面给的是docker 的版本信息,先确认下你的runtime。是不是有节点同时安装了docker和containerd?

        有用到tke的超级节点池,不知道这里面是不是两个运行时都有

        kubectl get node -ojson | jq ‘.items[0].status.nodeInfo.containerRuntimeVersion’

        执行下这个看看,所有节点都执行下

          wanjunlei 这个eks的就是三个超级节点池,其它普通的节点都是docker的

          [root@rhino-c1-c1 ~]# kubectl get node -o jsonpath='{.items[0].status.nodeInfo.containerRuntimeVersion}'
          eks://2.0[root@rhino-c1-c1 ~]# 
          [root@rhino-c1-c1 ~]# kubectl get node -o jsonpath='{.items[1].status.nodeInfo.containerRuntimeVersion}'
          eks://2.0[root@rhino-c1-c1 ~]# 
          [root@rhino-c1-c1 ~]# kubectl get node -o jsonpath='{.items[2].status.nodeInfo.containerRuntimeVersion}'
          eks://2.0[root@rhino-c1-c1 ~]# 
          [root@rhino-c1-c1 ~]# kubectl get node -o jsonpath='{.items[3].status.nodeInfo.containerRuntimeVersion}'
          docker://19.3.9[root@rhino-c1-c1 ~]# 
          [root@rhino-c1-c1 ~]# kubectl get node -o jsonpath='{.items[4].status.nodeInfo.containerRuntimeVersion}'
          docker://19.3.9

          首先,ks 目前不支持不同节点使用不同的 container runtime,其次 这个 eks 的 runtime 没有对接过,不确定能不能支持

            wanjunlei 可以怎么改成支持docker运行时的吗,先让docker节点的可以正常

            编辑 cc

            kubectl edit cc -n kubesphere-system ks-installer

            删除 status 中的 auditing、fluentbit、logging、events 部分,然后把 ks-installer 调度到 docker 节点上,等待安装完成

              先检查 fb 的配置文件,看看是不是 docker 的。

              然后看看 fluentbit-operator 的initcontainer,正确的命令应该是

               echo CONTAINER_ROOT_DIR=$(docker info -f {{'{{.DockerRootDir}}'}}) > /fluentbit-operator/fluent-bit.env

              都正确的话把 fluentbit-operator 调度到 docker 节点上,然后删除 fluent-bit daemonset 试试

                wanjunlei

                fb的配置还是containerd的

                fluent-bit.conf:
                [Service]
                    Parsers_File    parsers.conf
                [Input]
                    Name    systemd
                    Path    /var/log/journal
                    DB    /fluent-bit/tail/docker.db
                    DB.Sync    Normal
                    Tag    service.crio
                    Systemd_Filter    _SYSTEMD_UNIT=crio.service
                [Input]
                    Name    systemd
                    Path    /var/log/journal
                    DB    /fluent-bit/tail/docker.db
                    DB.Sync    Normal
                    Tag    service.docker
                    Systemd_Filter    _SYSTEMD_UNIT=docker.service
                [Input]
                    Name    systemd
                    Path    /var/log/journal
                    DB    /fluent-bit/tail/kubelet.db
                    DB.Sync    Normal
                    Tag    service.kubelet
                    Systemd_Filter    _SYSTEMD_UNIT=kubelet.service
                [Input]
                    Name    tail
                    Path    /var/log/containers/*.log
                    Exclude_Path    /var/log/containers/*_kubesphere-logging-system_events-exporter*.log,/var/log/containers/kube-auditing-webhook*_kubesphere-logging-system_kube-auditing-webhook*.log
                    Refresh_Interval    10
                    Skip_Long_Lines    true
                    DB    /fluent-bit/tail/pos.db
                    DB.Sync    Normal
                    Mem_Buf_Limit    5MB
                    Parser    cri
                    Tag    kube.*
                [Input]
                    Name    tail
                    Path    /var/log/containers/kube-auditing-webhook*_kubesphere-logging-system_kube-auditing-webhook*.log
                    Refresh_Interval    10
                    Skip_Long_Lines    true
                    DB    /fluent-bit/tail/pos-auditing.db
                    DB.Sync    Normal
                    Mem_Buf_Limit    5MB
                    Parser    cri
                    Tag    kube_auditing
                [Input]
                    Name    tail
                    Path    /var/log/containers/*_kubesphere-logging-system_events-exporter*.log
                    Refresh_Interval    10
                    Skip_Long_Lines    true
                    DB    /fluent-bit/tail/pos-events.db
                    DB.Sync    Normal
                    Mem_Buf_Limit    5MB
                    Parser    cri
                    Tag    kube_events
                [Filter]
                    Name    lua
                    Match    kube.*
                    script    /fluent-bit/config/containerd.lua
                    call    containerd
                    time_as_table    true
                [Filter]
                    Name    parser
                    Match    kube_auditing
                    Key_Name    message
                    Parser    json
                [Filter]
                    Name    modify
                    Match    kube_auditing
                    Condition    Key_does_not_exist    AuditID    
                    Add    ignore    true
                [Filter]
                    Name    grep
                    Match    kube_auditing
                    Exclude    ignore true
                [Filter]
                    Name    parser
                    Match    kube_events
                    Key_Name    message
                    Parser    json
                [Filter]
                    Name    kubernetes
                    Match    kube.*
                    Kube_URL    https://kubernetes.default.svc:443
                    Kube_CA_File    /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
                    Kube_Token_File    /var/run/secrets/kubernetes.io/serviceaccount/token
                    Labels    false
                    Annotations    false
                [Filter]
                    Name    nest
                    Match    kube.*
                    Operation    lift
                    Nested_under    kubernetes
                    Add_prefix    kubernetes_
                [Filter]
                    Name    modify
                    Match    kube.*
                    Remove    stream
                    Remove    kubernetes_pod_id
                    Remove    kubernetes_host
                    Remove    kubernetes_container_hash
                [Filter]
                    Name    nest
                    Match    kube.*
                    Operation    nest
                    Wildcard    kubernetes_*
                    Nest_under    kubernetes
                    Remove_prefix    kubernetes_
                [Filter]
                    Name    lua
                    Match    service.*
                    script    /fluent-bit/config/systemd.lua
                    call    add_time
                    time_as_table    true
                [Output]
                    Name    es
                    Match_Regex    (?:kube|service)\.(.*)
                    Host    10.33.10.17
                    Port    9200
                    Logstash_Format    true
                    Logstash_Prefix    ks-logstash-log
                    Time_Key    @timestamp
                    Generate_ID    true
                    Trace_Error    true
                [Output]
                    Name    es
                    Match    kube_auditing
                    Host    10.33.10.17
                    Port    9200
                    Logstash_Format    true
                    Logstash_Prefix    ks-logstash-auditing
                    Generate_ID    true
                [Output]
                    Name    es
                    Match    kube_events
                    Host    10.33.10.17
                    Port    9200
                    Logstash_Format    true
                    Logstash_Prefix    ks-logstash-events
                    Generate_ID    true

                fluentbit-operator 的initcontainer

                echo CONTAINER_ROOT_DIR=/var/log > /fluentbit-operator/fluent-bit.env

                现在只能手动改了

                1 改 input

                kubectl edit input -n kubesphere-logging-system tail

                kubectl edit input -n kubesphere-logging-system tail-auditing

                kubectl edit input -n kubesphere-logging-system tail-events

                修改 parser 为 docker

                删除 crio input

                kubectl delete input -n kubesphere-logging-system crio

                创建 docker input

                apiVersion: logging.kubesphere.io/v1alpha2
                kind: Input
                metadata:
                  name: {{ logging_container_runtime | default('docker') }}
                  namespace: kubesphere-logging-system
                  labels:
                    logging.kubesphere.io/enabled: "true"
                    logging.kubesphere.io/component: logging
                spec:
                  systemd:
                    tag: service.docker
                    path: /var/log/journal
                    db: /fluent-bit/tail/docker.db
                    dbSync: Normal
                    systemdFilter:
                      - _SYSTEMD_UNIT=docker.service

                2 删除 containerd filter

                kubectl delete filter -n kubesphere-logging-system containerd

                3 修改 fluent-operator,需要修改 volumes 和 initContainer,可以照这个修改

                kind: Deployment
                apiVersion: apps/v1
                metadata:
                  name: fluentbit-operator
                  namespace: kubesphere-logging-system
                  labels:
                    app.kubernetes.io/component: operator
                    app.kubernetes.io/name: fluentbit-operator
                spec:
                  replicas: 1
                  selector:
                    matchLabels:
                      app.kubernetes.io/component: operator
                      app.kubernetes.io/name: fluentbit-operator
                  template:
                    metadata:
                      labels:
                        app.kubernetes.io/component: operator
                        app.kubernetes.io/name: fluentbit-operator
                    spec:
                      volumes:
                        - name: env
                          emptyDir: {}
                        - name: dockersock
                          hostPath:
                            path: /var/run/docker.sock
                            type: ''
                      initContainers:
                        - name: setenv
                          image: 'docker:19.03'
                          command:
                            - /bin/sh
                            - '-c'
                            - >-
                              set -ex; echo CONTAINER_ROOT_DIR=$(docker info -f
                              {{.DockerRootDir}}) > /fluentbit-operator/fluent-bit.env
                          resources: {}
                          volumeMounts:
                            - name: env
                              mountPath: /fluentbit-operator
                            - name: dockersock
                              readOnly: true
                              mountPath: /var/run/docker.sock
                          terminationMessagePath: /dev/termination-log
                          terminationMessagePolicy: File
                          imagePullPolicy: IfNotPresent

                flunet-operator 重启完成后,删除 flunet-bit daemonset