荒-于嬉 2025-06-17 16:52 采纳率: 80%
浏览 13
已结题

kubernetes对接ceph pvc状态pending问题

kubernetes 对接ceph存储 但是pvc 一直处于pending状态

参考文档:https://docs.ceph.com/en/reef/rbd/rbd-kubernetes/

kubernetes 版本v1.32.3
ceph版本18.2.7

ceph集群使用cephadm搭建 节点为 192.168.0.31、192.168.0.32、192.168.0.33
kubernetes集群节点为 192.168.0.101、192.168.0.102、192.168.0.103

环境为本地VMware环境 没有网络策略,kubernetes中也不存在networkpolicies相关策略。

pvc信息:

root@knode1:~/git-project/tmp# kubectl get pvc 
NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS   VOLUMEATTRIBUTESCLASS   AGE
raw-block-pvc   Pending                                      csi-rbd-sc     <unset>                 20m
root@knode1:~/git-project/tmp# kubectl describe pvc raw-block-pvc 
Name:          raw-block-pvc
Namespace:     default
StorageClass:  csi-rbd-sc
Status:        Pending
Volume:        
Labels:        <none>
Annotations:   volume.beta.kubernetes.io/storage-provisioner: rbd.csi.ceph.com
               volume.kubernetes.io/storage-provisioner: rbd.csi.ceph.com
Finalizers:    [kubernetes.io/pvc-protection]
Capacity:      
Access Modes:  
VolumeMode:    Block
Used By:       <none>
Events:
  Type     Reason                Age                   From                                                                                             Message
  ----     ------                ----                  ----                                                                                             -------
  Warning  ProvisioningFailed    5m3s (x2 over 10m)    rbd.csi.ceph.com_csi-rbdplugin-provisioner-db44f6c49-gml52_0bf3a6b7-337f-4072-b8ee-dfa7d18eb0d5  failed to provision volume with StorageClass "csi-rbd-sc": rpc error: code = DeadlineExceeded desc = stream terminated by RST_STREAM with error code: CANCEL
  Normal   ExternalProvisioning  2m29s (x62 over 17m)  persistentvolume-controller                                                                      Waiting for a volume to be created either by the external provisioner 'rbd.csi.ceph.com' or manually by the system administrator. If volume creation is delayed, please verify that the provisioner is running and correctly registered.
  Normal   Provisioning          3s (x8 over 17m)      rbd.csi.ceph.com_csi-rbdplugin-provisioner-db44f6c49-gml52_0bf3a6b7-337f-4072-b8ee-dfa7d18eb0d5  External provisioner is provisioning volume for claim "default/raw-block-pvc"
  Warning  ProvisioningFailed    3s (x5 over 15m)      rbd.csi.ceph.com_csi-rbdplugin-provisioner-db44f6c49-gml52_0bf3a6b7-337f-4072-b8ee-dfa7d18eb0d5  failed to provision volume with StorageClass "csi-rbd-sc": rpc error: code = DeadlineExceeded desc = context deadline exceeded

sc信息

root@knode1:~/git-project/tmp# kubectl get pvc 
NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS   VOLUMEATTRIBUTESCLASS   AGE
raw-block-pvc   Pending                                      csi-rbd-sc     <unset>                 20m

其他configmap等信息:

root@knode1:~/git-project/tmp# cat csi-config-map.yaml 
---
apiVersion: v1
kind: ConfigMap
data:
  config.json: |-
    [
      {
        "clusterID": "6342545a-4a92-11f0-be56-000c29c48aad",
        "monitors": [
          "192.168.0.31:6789",
          "192.168.0.32:6789",
          "192.168.0.33:6789"
        ]
      }
    ]
metadata:
  name: ceph-csi-config
root@knode1:~/git-project/tmp# cat ceph-config-map.yaml 
---
apiVersion: v1
kind: ConfigMap
data:
  ceph.conf: |
    [global]
    auth_cluster_required = cephx
    auth_service_required = cephx
    auth_client_required = cephx
  # keyring is a required key and its value should be empty
  keyring: |
metadata:
  name: ceph-config
root@knode1:~/git-project/tmp# cat csi-rbd-secret.yaml  
---
apiVersion: v1
kind: Secret
metadata:
  name: csi-rbd-secret
  namespace: default
stringData:
  userID: kubernetes
  userKey: AQDVIVFoUd8IJBAA+D4cR7CL5ICxagwWMvCV6Q==
root@knode1:~/git-project/tmp# cat csi-rbd-sc.yaml 
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
   name: csi-rbd-sc
provisioner: rbd.csi.ceph.com
parameters:
   clusterID: 6342545a-4a92-11f0-be56-000c29c48aad
   pool: kubernetes
   imageFeatures: layering
   csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret
   csi.storage.k8s.io/provisioner-secret-namespace: default
   csi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secret
   csi.storage.k8s.io/controller-expand-secret-namespace: default
   csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret
   csi.storage.k8s.io/node-stage-secret-namespace: default
reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:
   - discard
root@knode1:~/git-project/tmp# cat csi-kms-config-map.yaml
---
apiVersion: v1
kind: ConfigMap
data:
  config.json: |-
    {}
metadata:
  name: ceph-csi-encryption-kms-config
root@knode1:~/git-project/tmp# 

ceph集群相关信息

root@ceph1:~# ceph -s
  cluster:
    id:     6342545a-4a92-11f0-be56-000c29c48aad
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph1,ceph3,ceph2 (age 2h)
    mgr: ceph3.nryehs(active, since 2h), standbys: ceph1.chyiik
    osd: 6 osds: 6 up (since 2h), 6 in (since 2h)
 
  data:
    pools:   2 pools, 33 pgs
    objects: 11 objects, 449 KiB
    usage:   253 MiB used, 60 GiB / 60 GiB avail
    pgs:     33 active+clean
 
root@ceph1:~# ceph mon dump
epoch 3
fsid 6342545a-4a92-11f0-be56-000c29c48aad
last_changed 2025-06-16T10:14:11.100252+0000
created 2025-06-16T09:15:05.890378+0000
min_mon_release 18 (reef)
election_strategy: 1
0: [v2:192.168.0.31:3300/0,v1:192.168.0.31:6789/0] mon.ceph1
1: [v2:192.168.0.33:3300/0,v1:192.168.0.33:6789/0] mon.ceph3
2: [v2:192.168.0.32:3300/0,v1:192.168.0.32:6789/0] mon.ceph2
dumped monmap epoch 3
root@ceph1:~# ceph auth get client.kubernetes
[client.kubernetes]
    key = AQDVIVFoUd8IJBAA+D4cR7CL5ICxagwWMvCV6Q==
    caps mgr = "profile rbd pool=kubernetes"
    caps mon = "profile rbd"
    caps osd = "profile rbd pool=kubernetes"
root@ceph1:~# 

恳请提供帮助,感激不尽

  • 写回答

5条回答 默认 最新

  • 宝码香车 优质创作者: JavaScript技术领域 2025-06-17 17:22
    关注

    让【宝码香车】来帮你解答,本回答参考DeepSeek编写,并整理提供。

    如果还有疑问可以点击头像关注私信或评论。如果答案让您满意,请采纳、点赞、关注,非常感谢!

    Kubernetes 对接 Ceph PVC 状态 Pending 问题排查

    根据您提供的信息,PVC 一直处于 Pending 状态,主要原因是 Ceph CSI 插件无法成功创建卷。以下是详细的排查步骤和解决方案:

    主要问题分析

    从错误日志可以看到:

    failed to provision volume with StorageClass "csi-rbd-sc": rpc error: code = DeadlineExceeded desc = stream terminated by RST_STREAM with error code: CANCEL
    

    这表明 CSI 驱动无法在限定时间内完成卷的创建,通常是由于网络连接问题或权限配置不正确导致的。

    排查步骤

    1. 检查 Ceph 集群连接性

    确保 Kubernetes 节点可以访问 Ceph 集群的 monitor 节点:

    # 在每个 Kubernetes 节点上测试连接
    for node in 101 102 103; do
      echo "Testing knode$node connectivity:"
      for mon in 31 32 33; do
        echo -n "192.168.0.$mon:6789 -> "
        timeout 2 telnet 192.168.0.$mon 6789 </dev/null | grep Connected || echo "Failed"
      done
    done
    

    2. 验证 Ceph 集群配置

    确保 kubernetes 池存在且权限正确:

    # 在 Ceph 节点上执行
    ceph osd lspools | grep kubernetes
    ceph auth get client.kubernetes
    

    3. 检查 CSI 驱动 Pod 状态

    kubectl get pods -n kube-system | grep csi-rbd
    kubectl logs -n kube-system <csi-rbdplugin-provisioner-pod-name>
    

    4. 验证 Secret 配置

    确保 Secret 中的 key 与 Ceph 集群中的一致:

    kubectl get secret csi-rbd-secret -o yaml
    

    解决方案

    1. 更新 StorageClass 配置

    修改 csi-rbd-sc.yaml,添加 mounter 参数并确认 pool 名称:

    apiVersion: storage.k8s.io/v1
    kind: StorageClass
    metadata:
      name: csi-rbd-sc
    provisioner: rbd.csi.ceph.com
    parameters:
      clusterID: 6342545a-4a92-11f0-be56-000c29c48aad
      pool: kubernetes
      imageFeatures: layering
      mounter: rbd
      csi.storage.k8s.io/fstype: ext4
      csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret
      csi.storage.k8s.io/provisioner-secret-namespace: default
      csi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secret
      csi.storage.k8s.io/controller-expand-secret-namespace: default
      csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret
      csi.storage.k8s.io/node-stage-secret-namespace: default
    reclaimPolicy: Delete
    allowVolumeExpansion: true
    mountOptions:
      - discard
    

    2. 检查 Ceph 客户端配置

    确保每个 Kubernetes 节点上都安装了 ceph-common 包:

    # 在每个 Kubernetes 节点上执行
    apt-get install -y ceph-common
    

    3. 验证 Ceph 集群访问

    在 Kubernetes 节点上手动测试 Ceph 访问:

    # 创建临时配置文件
    echo -e "[global]\nmon_host = 192.168.0.31,192.168.0.32,192.168.0.33" > /tmp/ceph.conf
    echo -e "[client.kubernetes]\nkey = AQDVIVFoUd8IJBAA+D4cR7CL5ICxagwWMvCV6Q==" > /tmp/ceph.client.kubernetes.keyring
    
    
    # 测试访问
    rbd --cluster=6342545a-4a92-11f0-be56-000c29c48aad --pool=kubernetes ls --id=kubernetes --keyring=/tmp/ceph.client.kubernetes.keyring --conf=/tmp/ceph.conf
    

    4. 检查防火墙设置

    确保 Kubernetes 和 Ceph 节点间的以下端口是开放的:

    • Ceph Monitors: 6789 (v1), 3300 (v2)
    • Ceph OSDs: 6800-7300

    问题排查流程

    PVC状态Pending

    常见问题总结

    1. 网络连接问题:确保 Kubernetes 节点可以访问 Ceph 集群的所有 monitor 节点
    2. 权限不足:确认 client.kubernetes 用户有足够的权限访问 kubernetes
    3. 配置不匹配:检查 StorageClass 中的 clusterIDpool 等参数是否正确
    4. CSI 驱动问题:确保 CSI 驱动 Pod 正常运行且日志无错误

    如果以上步骤仍不能解决问题,请提供 CSI 驱动 Pod 的详细日志以便进一步分析。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(4条)

报告相同问题?

问题事件

  • 系统已结题 6月25日
  • 已采纳回答 6月17日
  • 创建了问题 6月17日