手把手系列 - Ceph使用rbd及cephfs与k8s集群集成 - 三种方式实现

发布时间 2023-11-13 13:55:15作者: hukey

前言

本次详细部署 k8s、ceph 以及如何使用 ceph 作为 k8s 的外部存储。

主机信息

主机名 IP 说明信息
k8s-master 192.168.199.41 k8s master
k8s-node01 192.168.199.42 node
k8s-node02 192.168.199.43 node
ceph-node01 192.168.199.44 ceph-node01 - 独立磁盘(/dev/sdb)
ceph-node02 192.168.199.45 ceph-node02 - 独立磁盘(/dev/sdb)
ceph-node03 192.168.199.46 ceph-node03 - 独立磁盘(/dev/sdb)

系统版本

Kubernetes: v1.19.7
Ceph: rpm-nautilus
操作系统: CentOS Linux release 7.9.2009 (Core)

k8s部署

主机及版本介绍

主机信息

主机名 IP 角色
k8s-master 192.168.199.41 k8s master
k8s-node01 192.168.199.42 node
k8s-node02 192.168.199.43 node

系统版本

Kubernetes: v1.19.7
操作系统: CentOS Linux release 7.9.2009 (Core)

初始化主机

  1. 修改主机名

注意:每台主机都需操作

#修改主机名
hostnamectl set-hostname --static k8s-master
hostnamectl set-hostname --static k8s-node01
hostnamectl set-hostname --static k8s-node02

#申明hosts
cat << EOF >> /etc/hosts
192.168.199.41  k8s-master
192.168.199.42  k8s-node01
192.168.199.43  k8s-node02
EOF
  1. 配置yum源

注意:每台主机都需操作

curl http://mirrors.aliyun.com/repo/Centos-7.repo -o /etc/yum.repos.d/Centos-7.repo
curl http://mirrors.aliyun.com/repo/epel-7.repo -o /etc/yum.repos.d/epel-7.repo

#添加docker-ce源
curl http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -o /etc/yum.repos.d/docker-ce.repo

#添加k8s源
cat << EOF >> /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
gpgcheck=0
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
EOF

yum clean all && yum repolist all
  1. 关闭selinux和firewalld

注意:每台主机都需操作

sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
systemctl disable firewalld; systemctl stop firewalld

#这里修改完成后重启启动
reboot
  1. 关闭swap
swapoff -a
sed -ri 's/.*swap.*/#&/' /etc/fstab
  1. 配置时间同步

注意:每台主机都需操作

yum install chrony ntpdate -y
sed -i "s/^server/#server/g" /etc/chrony.conf
echo 'server tiger.sina.com.cn iburst' >> /etc/chrony.conf
echo 'server ntp1.aliyun.com iburst' >> /etc/chrony.conf
systemctl enable chronyd ; systemctl start chronyd

安装docker-ce

  1. 安装docker-ce

注意:每台主机都需操作

yum install -y docker-ce-23.0.6

#配置内核参数
cat << 'EOF' >> /etc/sysctl.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF

sysctl --system

cat << 'EOF' > /etc/docker/daemon.json
{
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m",
    "max-file": "3"
  },
  "exec-opts": ["native.cgroupdriver=systemd"],
  "storage-driver": "overlay2",
  "insecure-registries": [
    "192.168.1.200:80"
  ],
  "registry-mirrors": [
    "https://docker.mirrors.ustc.edu.cn",
    "https://hub-mirror.c.163.com"
  ]
}
EOF

systemctl enable docker ; systemctl start docker

安装k8s程序包

注意:每台主机都需操作

yum install -y kubeadm-1.19.7 kubelet-1.19.7 kubectl-1.19.7

#设置kubelet 开机自启
systemctl enable kubelet

配置k8s相关参数

注意:每台主机都需操作

#修改k8s使用ipvs
cat << EOF > /etc/sysconfig/modules/ipvs.modules
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
modprobe -- br_netfilter
EOF

chmod 755 /etc/sysconfig/modules/ipvs.modules && source /etc/sysconfig/modules/ipvs.modules

cat << EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
net.ipv4.tcp_tw_recycle=0
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.ipv6.conf.all.disable_ipv6=1
net.netfilter.nf_conntrack_max=2310720
EOF

sysctl --system

设置kube自动补全

注意:每台主机都需操作

yum install -y bash-completion
kubeadm completion bash > /etc/bash_completion.d/kubeadm
kubectl completion bash > /etc/bash_completion.d/kubectl
source /etc/bash_completion.d/kubeadm /etc/bash_completion.d/kubectl

生成kubeadm初始化文件

注意:k8s-master节点执行

root@k8s-master(192.168.199.41)~>kubeadm config print init-defaults > kube-init.yaml

#修改补充初始化文件
root@k8s-master(192.168.199.41)~>cat kube-init.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 0s	#token失效时间,0表示无限制
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.199.41	#这里一定要修改为 k8s-master IP地址!!!
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  name: k8s-master
  taints:
  - effect: NoSchedule
    key: node-role.kubernetes.io/master
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
  type: CoreDNS
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers	#修改为国内镜像仓库
kind: ClusterConfiguration
kubernetesVersion: v1.19.7
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16	#添加pod网络网段
---
#添加ipvs为默认kubeproxy规则
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs

拉取镜像

注意:k8s-master节点执行

root@k8s-master(192.168.199.41)~>kubeadm config images list --config kube-init.yaml
W0825 17:25:37.243941   16322 configset.go:348] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
registry.aliyuncs.com/google_containers/kube-apiserver:v1.19.7
registry.aliyuncs.com/google_containers/kube-controller-manager:v1.19.7
registry.aliyuncs.com/google_containers/kube-scheduler:v1.19.7
registry.aliyuncs.com/google_containers/kube-proxy:v1.19.7
registry.aliyuncs.com/google_containers/pause:3.2
registry.aliyuncs.com/google_containers/etcd:3.4.13-0
registry.aliyuncs.com/google_containers/coredns:1.7.0

root@k8s-master(192.168.199.41)~>kubeadm config images pull --config kube-init.yaml
W0825 17:25:43.478110   16351 configset.go:348] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-apiserver:v1.19.7
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-controller-manager:v1.19.7
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-scheduler:v1.19.7
[config/images] Pulled registry.aliyuncs.com/google_containers/kube-proxy:v1.19.7
[config/images] Pulled registry.aliyuncs.com/google_containers/pause:3.2
[config/images] Pulled registry.aliyuncs.com/google_containers/etcd:3.4.13-0
[config/images] Pulled registry.aliyuncs.com/google_containers/coredns:1.7.0

初始化k8s

注意:k8s-master节点执行

root@k8s-master(192.168.199.41)~>kubeadm init --config=./kube-init.yaml | tee kube-init.log
...
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.199.41:6443 --token abcdef.0123456789abcdef \
    --discovery-token-ca-cert-hash sha256:bb7a514b435210caad0f928e0a7143ce25915bcf5b0e687b75f77b9b4db126c1


root@k8s-master(192.168.199.41)~>  mkdir -p $HOME/.kube
root@k8s-master(192.168.199.41)~>  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
root@k8s-master(192.168.199.41)~>  sudo chown $(id -u):$(id -g) $HOME/.kube/config

root@k8s-master(192.168.199.41)~>kubectl get nodes
NAME         STATUS     ROLES    AGE   VERSION
k8s-master   NotReady   master   32s   v1.19.7

node节点加入集群

注意:所有node节点执行

root@k8s-node01(192.168.199.42)~>kubeadm join 192.168.199.41:6443 --token abcdef.0123456789abcdef \
    --discovery-token-ca-cert-hash sha256:bb7a514b435210caad0f928e0a7143ce25915bcf5b0e687b75f77b9b4db126c1
    
root@k8s-node02(192.168.199.43)~>kubeadm join 192.168.199.41:6443 --token abcdef.0123456789abcdef \
    --discovery-token-ca-cert-hash sha256:bb7a514b435210caad0f928e0a7143ce25915bcf5b0e687b75f77b9b4db126c1
    
#master上查看节点
root@k8s-master(192.168.199.41)~>kubectl get nodes
NAME         STATUS     ROLES    AGE     VERSION
k8s-master   NotReady   master   11m     v1.19.7
k8s-node01   NotReady   <none>   9m28s   v1.19.7
k8s-node02   NotReady   <none>   7m28s   v1.19.7

配置网络插件

注意:k8s-master节点执行

国内网络太难了,下载网络插件:https://github.com/projectcalico/calico/releases/download/v3.17.2/release-v3.17.2.tgz

  1. 导入镜像
root@k8s-master(192.168.199.41)~>tar xf release-v3.17.2.tgz
root@k8s-master(192.168.199.41)~>cd release-v3.17.2/images/
root@k8s-master(192.168.199.41)~/release-v3.17.2/images>for i in `/usr/bin/ls *.tar`; do docker load -i $i ; done
  1. 执行yaml文件
root@k8s-master(192.168.199.41)~/release-v3.17.2/images>cd ../k8s-manifests/
root@k8s-master(192.168.199.41)~/release-v3.17.2/k8s-manifests>kubectl apply -f  calico.yaml
  1. 查看Pod及node状态

等待一会,查看Pod 和 node 状态

root@k8s-master(192.168.199.41)~>kubectl get po -n kube-system
NAME                                      READY   STATUS    RESTARTS   AGE
calico-kube-controllers-86bddfcff-sbzt8   1/1     Running   0          2d15h
calico-node-4jb6g                         1/1     Running   0          2d15h
calico-node-v4ktc                         1/1     Running   0          2d15h
calico-node-xrpcm                         1/1     Running   0          2d15h
coredns-6d56c8448f-2d9qt                  1/1     Running   0          2d15h
coredns-6d56c8448f-bhsjx                  1/1     Running   0          2d15h
etcd-k8s-master                           1/1     Running   1          2d16h
kube-apiserver-k8s-master                 1/1     Running   1          2d16h
kube-controller-manager-k8s-master        1/1     Running   1          2d16h
kube-proxy-hjwbf                          1/1     Running   1          2d16h
kube-proxy-lm85c                          1/1     Running   1          2d16h
kube-proxy-mmhwz                          1/1     Running   1          2d16h
kube-scheduler-k8s-master                 1/1     Running   1          2d16h

root@k8s-master(192.168.199.41)~>kubectl get nodes
NAME         STATUS   ROLES    AGE     VERSION
k8s-master   Ready    master   2d16h   v1.19.7
k8s-node01   Ready    <none>   2d16h   v1.19.7
k8s-node02   Ready    <none>   2d16h   v1.19.7

Ceph部署

主机及版本介绍

主机信息

主机名 IP 说明信息
ceph-node01 192.168.199.44 ceph-node01 - 独立磁盘(/dev/sdb)
ceph-node02 192.168.199.45 ceph-node02 - 独立磁盘(/dev/sdb)
ceph-node03 192.168.199.46 ceph-node03 - 独立磁盘(/dev/sdb)

系统版本

Ceph: rpm-nautilus
操作系统: CentOS Linux release 7.9.2009 (Core)

初始化主机

  1. 修改主机名

注意:每台主机都需操作

#修改主机名
hostnamectl set-hostname --static ceph-node01
hostnamectl set-hostname --static ceph-node02
hostnamectl set-hostname --static ceph-node03

#申明hosts
cat << EOF >> /etc/hosts
192.168.199.44  ceph-node01
192.168.199.45  ceph-node02
192.168.199.46  ceph-node03
EOF
  1. 配置yum源

注意:每台主机都需操作

curl http://mirrors.aliyun.com/repo/Centos-7.repo -o /etc/yum.repos.d/Centos-7.repo
curl http://mirrors.aliyun.com/repo/epel-7.repo -o /etc/yum.repos.d/epel-7.repo

#添加docker-ce源
curl http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -o /etc/yum.repos.d/docker-ce.repo

#添加ceph源
cat << EOF >> /etc/yum.repos.d/ceph.repo
[ceph]
name=ceph
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/x86_64/
enabled=1
gpgcheck=0
priority=1
 
[ceph-noarch]
name=cephnoarch
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch/
enabled=1
gpgcheck=0
priority=1
 
[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/SRPMS
enabled=1
gpgcheck=0
priority=1
EOF

yum clean all && yum repolist all
  1. 关闭selinux和firewalld

注意:每台主机都需操作

sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
systemctl disable firewalld; systemctl stop firewalld

#这里修改完成后重启启动
reboot
  1. 配置时间同步

注意:每台主机都需操作

yum install chrony ntpdate -y
sed -i "s/^server/#server/g" /etc/chrony.conf
echo 'server tiger.sina.com.cn iburst' >> /etc/chrony.conf
echo 'server ntp1.aliyun.com iburst' >> /etc/chrony.conf
systemctl enable chronyd ; systemctl start chronyd
  1. 设置ssh互信

注意:每台主机都需操作

ssh-keygen -t rsa -P ''
ssh-copy-id ceph-node01
ssh-copy-id ceph-node02
ssh-copy-id ceph-node03

安装ceph程序包

  1. 安装程序包

注意:每台主机都需操作

yum install ceph ceph-radosgw -y
  1. 安装ceph-deploy部署工具

注意:ceph-node01 操作

root@ceph-node01(192.168.199.44)~>yum install -y ceph-deploy

初始化集群

注意:ceph-node01 操作

root@ceph-node01(192.168.199.44)~>cd /etc/ceph/
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy new ceph-node0{1..3} --cluster-network=192.168.199.0/24 --public-network=192.168.199.0/24

执行完成后会生成以下文件

root@ceph-node01(192.168.199.44)/etc/ceph>ls
ceph.conf  ceph-deploy-ceph.log  ceph.mon.keyring  rbdmap

查看配置文件

root@ceph-node01(192.168.199.44)/etc/ceph>cat ceph.conf
[global]
fsid = 3d3b333b-d461-4131-88fb-7257f58845e3
public_network = 192.168.199.0/24
cluster_network = 192.168.199.0/24
mon_initial_members = ceph-node01, ceph-node02, ceph-node03
mon_host = 192.168.199.44,192.168.199.45,192.168.199.46
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx

mon节点初始化

注意:ceph-node01 /etc/ceph 目录下操作

root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy mon create-initial

#查看集群状态
root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s
  cluster:
    id:     3d3b333b-d461-4131-88fb-7257f58845e3
    health: HEALTH_WARN
            mons are allowing insecure global_id reclaim

  services:
    mon: 3 daemons, quorum ceph-node01,ceph-node02,ceph-node03 (age 28s)	#mon节点状态
    mgr: no daemons active
    osd: 0 osds: 0 up, 0 in

  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:

创建MGR

注意:ceph-node01 /etc/ceph 目录下操作

mgr是为 ceph-dashboard 提供支持服务的,可在某一节点开启即可。

root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy mgr create ceph-node01

#查看集群状态
root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s
  cluster:
    id:     3d3b333b-d461-4131-88fb-7257f58845e3
    health: HEALTH_WARN
            mons are allowing insecure global_id reclaim

  services:
    mon: 3 daemons, quorum ceph-node01,ceph-node02,ceph-node03 (age 2m)
    mgr: ceph-node01(active, since 6s)	#已创建mgr成功
    osd: 0 osds: 0 up, 0 in

  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:

添加OSD

注意:ceph-node01 /etc/ceph 目录下操作

#查看帮助信息
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk --help
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy osd --help

查看磁盘

root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk list ceph-node01
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk list ceph-node02
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk list ceph-node03

初始化磁盘

root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk zap ceph-node01 /dev/sdb
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk zap ceph-node02 /dev/sdb
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy disk zap ceph-node03 /dev/sdb

添加osd

root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy osd create ceph-node01 --data /dev/sdb
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy osd create ceph-node02 --data /dev/sdb
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy osd create ceph-node03 --data /dev/sdb

查看osd

root@ceph-node01(192.168.199.44)/etc/ceph>ceph osd tree
ID CLASS WEIGHT  TYPE NAME            STATUS REWEIGHT PRI-AFF
-1       0.02939 root default
-3       0.00980     host ceph-node01
 0   hdd 0.00980         osd.0            up  1.00000 1.00000
-5       0.00980     host ceph-node02
 1   hdd 0.00980         osd.1            up  1.00000 1.00000
-7       0.00980     host ceph-node03
 2   hdd 0.00980         osd.2            up  1.00000 1.00000

查看集群

root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s
  cluster:
    id:     3d3b333b-d461-4131-88fb-7257f58845e3
    health: HEALTH_WARN
            mons are allowing insecure global_id reclaim

  services:
    mon: 3 daemons, quorum ceph-node01,ceph-node02,ceph-node03 (age 32m)
    mgr: ceph-node01(active, since 29m)
    osd: 3 osds: 3 up (since 2m), 3 in (since 2m)

  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:

集群告警处理

  1. 禁止不安全的模式
root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s
  cluster:
    id:     3d3b333b-d461-4131-88fb-7257f58845e3
    health: HEALTH_WARN
            mons are allowing insecure global_id reclaim
...


#解决办法
root@ceph-node01(192.168.199.44)/etc/ceph>ceph config set mon auth_allow_insecure_global_id_reclaim false

root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s
  cluster:
    id:     3d3b333b-d461-4131-88fb-7257f58845e3
    health: HEALTH_OK
...
  1. 调整时间偏差阈值
root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s
  cluster:
    id:     3d3b333b-d461-4131-88fb-7257f58845e3
    health: HEALTH_WARN
            clock skew detected on mon.ceph-node02	#时间存在偏差


#解决办法
root@ceph-node01(192.168.199.44)/etc/ceph>cat << EOF >> /etc/ceph/ceph.conf
mon clock drift allowed = 2
mon clock drift warn backoff = 30
EOF

#再把修改的配置同步到所有节点(前面同步过配置文件,所以这次命令有点不同,这是同步覆盖过去)
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy --overwrite-conf admin ceph-node0{1..3}

#然后在重启时间偏差节点的mon服务
systemctl restart ceph-mon.target
  1. 添加删除pool的参数

为后期删除 pool 做准备。

root@ceph-node01(192.168.199.44)/etc/ceph>echo mon_allow_pool_delete = true >> /etc/ceph/ceph.conf
root@ceph-node01(192.168.199.44)/etc/ceph>echo mon_max_pg_per_osd = 2000 >> /etc/ceph/ceph.conf
root@ceph-node01(192.168.199.44)/etc/ceph>ceph-deploy --overwrite-conf admin ceph-node0{1..3}

#重启所有节点的mon服务
systemctl restart ceph-mon.target

开启dashboard

ceph 提供了原生的 Dashboard 功能,通过 ceph dashboard 完成对 ceph 存储系统可视化监视

  1. 安装 ceph-mgr-dashboard

(nautilus版) 需要安装 ceph-mgr-dashboard ,只需在 mgr 节点上安装

root@ceph-node01(192.168.199.31)~>ceph mgr module enable dashboard
Error ENOENT: all mgr daemons do not support module 'dashboard', pass --force to force enablement

root@ceph-node01(192.168.199.44)/etc/ceph>ceph -s | egrep mgr
    mgr: ceph-node01(active, since 47m)

root@ceph-node01(192.168.199.44)/etc/ceph>yum install ceph-mgr-dashboard -y
root@ceph-node01(192.168.199.44)/etc/ceph>ceph mgr module enable dashboard
  1. 禁止SSL
root@ceph-node01(192.168.199.44)/etc/ceph>ceph config set mgr mgr/dashboard/ssl false
  1. 设置监听地址和端口
root@ceph-node01(192.168.199.44)/etc/ceph>ceph config set mgr mgr/dashboard/server_addr 0.0.0.0
root@ceph-node01(192.168.199.44)/etc/ceph>ceph config set mgr mgr/dashboard/server_port 8080
root@ceph-node01(192.168.199.44)/etc/ceph>ceph mgr services
{
    "dashboard": "http://ceph-node01:8080/"
}
  1. 设置用户名和密码
root@ceph-node01(192.168.199.44)/etc/ceph>echo 'admin123' > ceph-dashboard-passwd.txt
root@ceph-node01(192.168.199.44)/etc/ceph>ceph dashboard set-login-credentials admin -i ceph-dashboard-passwd.txt
  1. 浏览器登录

image-20230915111938572

k8s数据持久化方案

在k8s集群中为 Pod资源提供数持久化,大致分为三种解决方案:

  • Volumes
    • volumes持久化分为三种类型:EmptyDir, HostPath, NFS
    • volumes是k8s集群中较为简单的数据持久化方案,无需创建其他资源,直接在Pod资源编排文件中声明 volume 卷,即可挂载
  • PV, PVC
    • pv和pvc是一种高级类型的持久化存储方案,pv负责与底层存储系统进行对接,pvc从pv中分配一定的资源,最后由PVC挂载到Pod中
  • StorageClass
    • 更加高级的一种持久化存储方案,由StorageClass提供一个数据持久化资源池,当Pod需要持久化数据时,直接从StorageClass中自动创建出PV和PVC,最后挂载到Pod资源中

k8s - ceph块存储

使用RBD块存储作为Pod资源数据持久化时,只有集群中某一个Node节点可以挂载一块RBD块存储设备,例如当Node1节点挂载了RBD块设备后,Node1节点中的所有Pod都可以共享这块RBD中的数据,但是其余Node节点都无法挂载这块RBD存储了。

RBD块存储设备无法跨节点使用,只有挂载RBD块存储的Node节点中的Pod资源可以使用,其余节点的Pod资源无法使用。

Ceph集群默认开启了CEPHX的认证,任何存储类型想要是有Ceph作为底层存储,都需要通过用户认证才可以正常使用。

K8S volume存储卷与RBD集成

创建k8s-volume使用的块设备

注意:ceph-node01 操作

1.创建一个pool资源池
root@ceph-node01(192.168.199.44)~>ceph osd pool create k8s_pool 128 128
pool 'k8s_pool' created

2.在资源池中创建块存储设备
root@ceph-node01(192.168.199.44)~>rbd create --image-feature layering k8s_pool/k8s_volumes --size 10G

3.查看创建的块存储设备
root@ceph-node01(192.168.199.44)~>rbd info k8s_pool/k8s_volumes
rbd image 'k8s_volumes':
        size 10 GiB in 2560 objects
        order 22 (4 MiB objects)
        snapshot_count: 0
        id: 5faf46ed210c
        block_name_prefix: rbd_data.5faf46ed210c
        format: 2
        features: layering
        op_features:
        flags:
        create_timestamp: Fri Sep 22 17:28:35 2023
        access_timestamp: Fri Sep 22 17:28:35 2023
        modify_timestamp: Fri Sep 22 17:28:35 2023

创建k8s访问块设备的认证用户

注意:ceph-node01 操作

K8S想要访问Ceph中的RBD块设备,必须通过一个认证用户才可以访问,如果没有认证用户则无法访问Ceph集群中的块设备。

命令格式:ceph auth get-or-create {用户名称} mon '{访问mon的方式}' osd '{访问osd的方式}'

root@ceph-node01(192.168.199.44)~>ceph auth get-or-create client.kubernetes mon 'profile rbd' osd 'profile rbd pool=k8s_pool'
[client.kubernetes]
        key = AQAU7RBlLylaNxAAklK1DIOfsmyr+DaViDQDUw==

命令解释:创建了一个client.kubernetes名称的Ceph认证用户,访问mon的方式是以RBD块存储形式访问,访问OSD的方式也是以块存储RBD方式访问Pool资源池。

查看创建的认证用户

root@ceph-node01(192.168.199.44)~>ceph auth list
...
client.kubernetes
        key: AQAU7RBlLylaNxAAklK1DIOfsmyr+DaViDQDUw==
        caps: [mon] profile rbd
        caps: [osd] profile rbd pool=k8s_pool
...

可以看到Ceph的认证用户都是以client开头,每一个用户都有对mon和osd的访问方式。

将认证的用户key存放k8s Secret资源中

将认证用户的key通过Base64进行加密,存储在K8S的Secret资源中。

  1. 将key通过 base64 进行加密
root@ceph-node01(192.168.199.44)~>ceph auth get-key client.kubernetes | base64
QVFBVTdSQmxMeWxhTnhBQWtsSzFESU9mc215citEYVZpRFFEVXc9PQ==
  1. 将加密后的key 存在 secret 中
root@k8s-master(192.168.199.41)~>mkdir -pv manifests
mkdir: created directory ‘manifests’
root@k8s-master(192.168.199.41)~>cd manifests/
root@k8s-master(192.168.199.41)~/manifests>vim ceph-rbd-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: ceph-rbd-secret
type: "kubernetes.io/rbd"
data:
  key: QVFBVTdSQmxMeWxhTnhBQWtsSzFESU9mc215citEYVZpRFFEVXc9PQ==
  1. 创建 secret资源

RBD的Secret要与Pod在同一Namespace下,如果不同的Namespace的Pod都需要使用RBD进行存储,则需要在每个Namespace下都进行创建。

root@k8s-master(192.168.199.41)~/manifests>kubectl apply -f  ceph-rbd-secret.yaml
secret/ceph-rbd-secret created
root@k8s-master(192.168.199.41)~/manifests>kubectl get secrets
NAME                  TYPE                                  DATA   AGE
ceph-rbd-secret       kubernetes.io/rbd                     1      3s
default-token-zs2s2   kubernetes.io/service-account-token   3      4m57s

在k8s集群所有节点安装ceph-common

ceph-common是 ceph 命令包,需要在每个节点安装,否则将无法执行命令对ceph 进行操作。

root@k8s-master(192.168.199.41)~>yum install -y ceph-common
root@k8s-node01(192.168.199.42)~>yum install -y ceph-common
root@k8s-node02(192.168.199.43)~>yum install -y ceph-common

创建pod资源使用 rbd 块存储进行数据持久化

  1. 编写编排文件
root@k8s-master(192.168.199.41)~>cd manifests/

root@k8s-master(192.168.199.41)~/manifests>cat pod-ngx.yaml
apiVersion: v1
kind: Pod
metadata:
  labels:
    run: ngx
  name: ngx
spec:
  containers:
  - image: nginx:alpine
    name: ngx
    volumeMounts:
    - name: rbd-volume
      mountPath: /data
  dnsPolicy: ClusterFirst
  restartPolicy: Always
  volumes:
  - name: rbd-volume
    rbd:
      monitors:
      - 192.168.199.44:6789
      - 192.168.199.45:6789
      - 192.168.199.46:6789
      pool: k8s_pool
      image: k8s_volumes
      fsType: xfs
      user: kubernetes
      secretRef:
        name: ceph-rbd-secret
  1. 创建pod资源
root@k8s-master(192.168.199.41)~/manifests>kubectl apply -f  pod-ngx.yaml
deployment.apps/ngx created

查看创建的pod
root@k8s-master(192.168.199.41)~/manifests>kubectl get po -o wide
NAME   READY   STATUS    RESTARTS   AGE     IP              NODE         NOMINATED NODE   READINESS GATES
ngx    1/1     Running   0          6m58s   10.244.58.196   k8s-node02   <none>           <none>



查看创建过程
root@k8s-master(192.168.199.41)~/manifests>kubectl describe po ngx
...
Events:
  Type    Reason                  Age    From                     Message
  ----    ------                  ----   ----                     -------
  Normal  Scheduled               7m15s  default-scheduler        Successfully assigned default/ngx to k8s-node02
  Normal  SuccessfulAttachVolume  7m15s  attachdetach-controller  AttachVolume.Attach succeeded for volume "rbd-volume"
  Normal  Pulled                  7m12s  kubelet                  Container image "nginx:alpine" already present on machine
  Normal  Created                 7m12s  kubelet                  Created container ngx
  Normal  Started                 7m12s  kubelet                  Started container ngx

查看pod资源挂载的RBD信息

1. 进入pod中查看挂载的RBD信息

可以看到Pod成功挂载上了RBD块存储,和之前做块设备映射一样,也是将块设备映射成了一块名为/dev/rbd0的裸磁盘,然后挂载到了/data目录中。

root@k8s-master(192.168.199.41)~/manifests>kubectl exec -it ngx -- sh
/ # df -Th /data
Filesystem           Type            Size      Used Available Use% Mounted on
/dev/rbd0            xfs            10.0G     32.8M     10.0G   0% /data

2. 在宿主机上查看挂载的rbd信息

为什么会在Pod中看到挂载的RBD块设备映射的磁盘,其实是宿主机将块设备映射的磁盘挂载到了容器的某个路径中,这才导致在Pod里显示的是一块RBD磁盘挂载。

首先查看Pod运行在了哪个Node节点上,然后查看RBD块设备的挂载信息。

root@k8s-node02(192.168.199.43)~>df -Th /dev/rbd0
Filesystem     Type  Size  Used Avail Use% Mounted on
/dev/rbd0      xfs    10G   33M   10G   1% /var/lib/kubelet/plugins/kubernetes.io/rbd/mounts/k8s_pool-image-k8s_volumes

3. 当pod资源被删除后观察NOde节点挂载的RBD状态

Pod资源被删除后,Node节点挂载的RBD块设备自动删除。

mater上删除pod
root@k8s-master(192.168.199.41)~/manifests>kubectl delete -f pod-ngx.yaml
deployment.apps "ngx" deleted

node节点再次查看 rbd0
root@k8s-node02(192.168.199.43)~>df -Th /dev/rbd0
df: ‘/dev/rbd0’: No such file or directory

4. 多个Pod同时使用一个块存储设备会有什么效果

一个块设备映射的磁盘只能被一个节点进行挂载,如果多个Pod资源分布在不同Node节点上,多个Node节点都要挂载这一个块存储映射的磁盘将会报错。

root@k8s-master(192.168.199.41)~/manifests>kubectl get po -o wide
NAME    READY   STATUS              RESTARTS   AGE   IP              NODE         NOMINATED NODE   READINESS GATES
ngx     1/1     Running             0          75s   10.244.58.196   k8s-node02   <none>           <none>
ngx-1   0/1     ContainerCreating   0          46s   <none>          k8s-node01   <none>           <none>

分布在不同节点上的Pod,第一个挂载成功后,第二个将很长时间处于:ContainerCreating 状态 ,查看详细日志:

root@k8s-master(192.168.199.41)~/manifests>kubectl describe po ngx-1
...
Events:
  Type     Reason                  Age               From                     Message
  ----     ------                  ----              ----                     -------
  Normal   Scheduled               112s              default-scheduler        Successfully assigned default/ngx-1 to k8s-node01
  Normal   SuccessfulAttachVolume  112s              attachdetach-controller  AttachVolume.Attach succeeded for volume "rbd-volume"
  Warning  FailedMount             9s (x2 over 60s)  kubelet                  MountVolume.WaitForAttach failed for volume "rbd-volume" : rbd image k8s_pool/k8s_volumes is still being used

如果两个Pod同时调度到一个节点上,那么可以同时使用同一块存储设备持久化Pod中的数据,并且Pod可以启动成功。

root@k8s-master(192.168.199.41)~/manifests>kk
NAME    READY   STATUS    RESTARTS   AGE     IP              NODE         NOMINATED NODE   READINESS GATES
ngx     1/1     Running   0          5m25s   10.244.58.196   k8s-node02   <none>           <none>
ngx-1   1/1     Running   0          15s     10.244.58.197   k8s-node02   <none>           <none>

类似于共享存储的效果

Pod-ngx 写入数据
root@k8s-master(192.168.199.41)~/manifests>kubectl exec -it ngx -- sh
/ # hostname >> /data/host_name
/ # exit

Pod-ngx-1 读取数据
root@k8s-master(192.168.199.41)~/manifests>kubectl exec -it ngx-1 -- sh
/ # cat /data/host_name
ngx

清理k8s pod资源及 ceph 块存储

注意:这里清理本章演示的内容,为接下来的章节做准备。

1. 清理k8s集群pod资源

root@k8s-master(192.168.199.41)~/manifests>kubectl get po -o wide
NAME    READY   STATUS    RESTARTS   AGE   IP              NODE         NOMINATED NODE   READINESS GATES
ngx     1/1     Running   0          19s   10.244.58.199   k8s-node02   <none>           <none>
ngx-1   1/1     Running   0          14s   10.244.58.200   k8s-node02   <none>           <none>
root@k8s-master(192.168.199.41)~/manifests>kubectl delete po ngx ngx-1
pod "ngx" deleted
pod "ngx-1" deleted

2. 清理 secret

root@k8s-master(192.168.199.41)~/manifests>kubectl get secrets
NAME                  TYPE                                  DATA   AGE
ceph-rbd-secret       kubernetes.io/rbd                     1      3h40m
default-token-zs2s2   kubernetes.io/service-account-token   3      3h45m
root@k8s-master(192.168.199.41)~/manifests>kubectl delete secrets ceph-rbd-secret
secret "ceph-rbd-secret" deleted

3. 清理ceph块存储及pool

1.查看rbd
root@ceph-node01(192.168.199.44)~>rbd ls -l k8s_pool
NAME        SIZE   PARENT FMT PROT LOCK
k8s_volumes 10 GiB          2

2.删除rbd
root@ceph-node01(192.168.199.44)~>rbd rm k8s_pool/k8s_volumes
Removing image: 100% complete...done.

3.删除pool
root@ceph-node01(192.168.199.44)/etc/ceph>ceph osd pool rm k8s_pool k8s_pool --yes-i-really-really-mean-it
pool 'k8s_pool' removed

4. 删除认证用户

root@ceph-node01(192.168.199.44)/etc/ceph>ceph auth rm client.kubernetes
updated

K8S PV存储卷与RBD集成

PV、PVC高级存储在K8S集群中的应用极多,与Volume集成方案类似。

创建K8S集群PV使用的块存储

1.创建一个pool资源池
root@ceph-node01(192.168.199.44)~>ceph osd pool create k8s_pool 128 128
pool 'k8s_pool' created

2.在资源池中创建块存储设备
root@ceph-node01(192.168.199.44)~>rbd create --image-feature layering k8s_pool/k8s_rbd --size 10G

3.查看创建的块存储设备
root@ceph-node01(192.168.199.44)/etc/ceph>rbd ls -l k8s_pool
NAME    SIZE   PARENT FMT PROT LOCK
k8s_rbd 10 GiB          2

4.查看rbd详细信息
root@ceph-node01(192.168.199.44)/etc/ceph>rbd info k8s_pool/k8s_rbd
rbd image 'k8s_rbd':
        size 10 GiB in 2560 objects
        order 22 (4 MiB objects)
        snapshot_count: 0
        id: 854dd1a4cd4b
        block_name_prefix: rbd_data.854dd1a4cd4b
        format: 2
        features: layering
        op_features:
        flags:
        create_timestamp: Mon Sep 25 14:14:56 2023
        access_timestamp: Mon Sep 25 14:14:56 2023
        modify_timestamp: Mon Sep 25 14:14:56 2023

创建k8s访问块设备的认证用户

root@ceph-node01(192.168.199.44)~>ceph auth get-or-create client.kubernetes mon 'profile rbd' osd 'profile rbd pool=k8s_pool'
[client.kubernetes]
        key = AQCWJRFlgyw/IhAAAORQ8XYGkzfQqPH13UxhRQ==

命令解释:创建了一个client.k8s_pv名称的Ceph认证用户,访问mon的方式是以RBD块存储形式访问,访问OSD的方式也是以块存储RBD方式访问Pool资源池。

查看创建的认证用户。

root@ceph-node01(192.168.199.44)/etc/ceph>ceph auth get  client.kubernetes
[client.kubernetes]
        key = AQCWJRFlgyw/IhAAAORQ8XYGkzfQqPH13UxhRQ==
        caps mon = "profile rbd"
        caps osd = "profile rbd pool=k8s_pool"
exported keyring for client.kubernetes

将认证的用户key存在k8s Secret资源中

将认证用户的key通过Base64进行加密,存储在K8S的Secret资源中。

1. 将key通过base64加密

root@ceph-node01(192.168.199.44)~>ceph auth get-key client.kubernetes | base64
QVFDV0pSRmxneXcvSWhBQUFPUlE4WFlHa3pmUXFQSDEzVXhoUlE9PQ==

2. 将加密后的key存储在 secret资源中

root@k8s-master(192.168.199.41)~>mkdir -pv manifests/rbd-pv
mkdir: created directory ‘manifests/rbd-pv’
root@k8s-master(192.168.199.41)~>cd manifests/rbd-pv

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>cat ceph-rbd-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: ceph-rbd-secret
type: "kubernetes.io/rbd"
data:
  key: QVFCZC9SQmxzRVd4TmhBQTdzMTBuRWxsMlZwOEFXZjhMeTRBQ3c9PQ==

3. 创建 secret 资源

RBD的Secret要与Pod在同一Namespace下,如果不同的Namespace的Pod都需要使用RBD进行存储,则需要在每个Namespace下都进行创建。

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl apply -f  ceph-rbd-secret.yaml
secret/ceph-rbd-secret configured

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl get secrets
NAME                  TYPE                                  DATA   AGE
ceph-rbd-secret       kubernetes.io/rbd                     1      63m
default-token-zs2s2   kubernetes.io/service-account-token   3      68m

在k8s集群所有节点安装ceph-common

ceph-common是 ceph 命令包,需要在每个节点安装,否则将无法执行命令对ceph 进行操作。

root@k8s-master(192.168.199.41)~>yum install -y ceph-common
root@k8s-node01(192.168.199.42)~>yum install -y ceph-common
root@k8s-node02(192.168.199.43)~>yum install -y ceph-common

创建PV及PVC资源使用RBD作为底层存储

在K8S集群中创建PV及PVC存储资源,主要是对PV进行了一些配置,存储底层采用Ceph集群的RBD块存储设备。

1. 编写资源编排文件

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>cat ceph-rbd-pv-pvc.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
  name: ceph-rbd-pv
  namespace: default
spec:
  capacity:
    storage: 1Gi
  accessModes:
  - ReadWriteMany
  rbd:
    monitors:
    - 192.168.199.44:6789
    - 192.168.199.45:6789
    - 192.168.199.46:6789
    pool: k8s_pool
    image: k8s_rbd
    user: kubernetes
    fsType: xfs
    secretRef:
      name: ceph-rbd-secret
  persistentVolumeReclaimPolicy: Recycle
  storageClassName: rbd
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: ceph-rbd-pvc
  namespace: default
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 1Gi
  storageClassName: rbd

2. 在集群中创建pv和pvc

1.创建资源
root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl apply -f  ceph-rbd-pv-pvc.yaml
persistentvolume/ceph-rbd-pv created
persistentvolumeclaim/ceph-rbd-pvc created

2.查看创建的资源
root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl get pv
NAME          CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                  STORAGECLASS   REASON   AGE
ceph-rbd-pv   1Gi        RWX            Recycle          Bound    default/ceph-rbd-pvc   rbd                     3m50s
root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl get pvc
NAME           STATUS   VOLUME        CAPACITY   ACCESS MODES   STORAGECLASS   AGE
ceph-rbd-pvc   Bound    ceph-rbd-pv   1Gi        RWX            rbd            107s

创建Pod资源挂载PV存储卷并写入数据

1. 编写Pod资源编排文件

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>cat ceph-rbd-pv-pod.yaml
apiVersion: v1
kind: Pod
metadata:
  labels:
    run: ceph-rbd-pv-pod
  name: ceph-rbd-pv-pod
spec:
  containers:
  - image: nginx:alpine
    name: ceph-rbd-pv-pod
    volumeMounts:
    - name: rbd-pvc
      mountPath: /data
  dnsPolicy: ClusterFirst
  restartPolicy: Always
  volumes:
  - name: rbd-pvc
    persistentVolumeClaim:
      claimName: ceph-rbd-pvc

2. 在集群中创建Pod资源

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl apply -f  ceph-rbd-pv-pod.yaml
pod/ceph-rbd-pv-pod created

3. 进入Pod中写入数据

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl exec -it ceph-rbd-pv-pod -- sh
/ # df -Th /data
Filesystem           Type            Size      Used Available Use% Mounted on
/dev/rbd0            xfs            10.0G     32.8M     10.0G   0% /data

/ # echo 'hello rbd.' > /data/hello.txt
/ # cat /data/hello.txt
hello rbd.

任何类型的存储卷使用RBD的方式都一样,都会在Pod所在宿主机中挂载RBD块设备映射的磁盘,然后挂载到具体的Pod中,无法跨主机

清理k8s pod资源及 ceph 块存储

注意:这里清理本章演示的内容,为接下来的章节做准备。

1. 清理k8s集群pod资源

root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl delete -f ceph-rbd-pv-pod.yaml
pod "ceph-rbd-pv-pod" deleted

2. 清理 pvc, pv

1.删除pvc
root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl delete pvc ceph-rbd-pvc
persistentvolumeclaim "ceph-rbd-pvc" deleted

2.删除pv
root@k8s-master(192.168.199.41)~/manifests/rbd-pv>kubectl delete pv ceph-rbd-pv
persistentvolume "ceph-rbd-pv" deleted

3. 清理ceph块存储及pool

1.查看rbd
root@ceph-node01(192.168.199.44)~>rbd ls -l k8s_pool
NAME        SIZE   PARENT FMT PROT LOCK
k8s_volumes 10 GiB          2

2.删除rbd
root@ceph-node01(192.168.199.44)~>rbd rm k8s_pool/k8s_rbd
Removing image: 100% complete...done.

3.删除pool
root@ceph-node01(192.168.199.44)/etc/ceph>ceph osd pool rm k8s_pool k8s_pool --yes-i-really-really-mean-it
pool 'k8s_pool' removed

4. 删除认证用户

root@ceph-node01(192.168.199.44)/etc/ceph>ceph auth rm client.kubernetes
updated

K8S storageclass 与RBD集成

RBD块存储与StorageClass集成架构图

image-20230925143642633

Ceph初始化存储池

1.创建名为kubernetes的存储池
root@ceph-node01(192.168.199.44)~>ceph osd pool create kubernetes 128 128
pool 'kubernetes' created

2.初始化,不能缺少
root@ceph-node01(192.168.199.44)~>rbd pool init kubernetes

创建k8s访问块设备的认证用户

root@ceph-node01(192.168.199.44)~>ceph auth get-or-create client.kubernetes mon 'profile rbd' osd 'profile rbd pool=kubernetes'
[client.kubernetes]
        key = AQAFOxFlk67IBhAAVHoSfvgdUAGHd/f2mGFurw==

部署 ceph-csi

ceph-csi 链接:https://github.com/ceph/ceph-csi/tree/devel/deploy

StorageClass资源可以通过客户端根据用户的需求自动创建出PV以及PVC资源。

StorageClass使用Ceph作为底层存储,为用户自动创建出PV以及PVC资源,使用的客户端工具是csi,首先需要在K8S集群中部署csi客户端工具,由csi客户端中驱动去连接Ceph集群。

注意:在部署 ceph-csi 时,需要特别注意版本,ceph-csi所使用的版本依据ceph和k8s版本而定。

本次版本如下:

ceph: nautilus
k8s: v1.19.7
ceph-csi: 3.4.0

ceph-csi 下载链接: https://github.com/ceph/ceph-csi/archive/refs/tags/v3.4.0.tar.gz

root@k8s-master(192.168.199.41)~>tar xf ceph-csi-3.4.0.tar.gz
root@k8s-master(192.168.199.41)~>cd ceph-csi-3.4.0/deploy/rbd/kubernetes/

1. 创建 csi-config-map

cat <<EOF > csi-config-map.yaml
---
apiVersion: v1
kind: ConfigMap
data:
  config.json: |-
    [
      {
        "clusterID": "6bfa643e-def8-4482-8865-6eac0b2938c9",
        "monitors": [
          "192.168.199.44:6789",
          "192.168.199.45:6789",
          "192.168.199.46:6789"
        ]
      }
    ]
metadata:
  name: ceph-csi-config
EOF


###说明###
"clusterID": "6bfa643e-def8-4482-8865-6eac0b2938c9"  通过 ceph mon dump 中的 fsid 获取

通过查看 ceph mon dump 下面的 mon 节点获取
"monitors": [
  "192.168.199.44:6789",
  "192.168.199.45:6789",
  "192.168.199.46:6789"
]

执行 csi-config-map.yaml 文件

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  csi-config-map.yaml
configmap/ceph-csi-config created
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get cm
NAME              DATA   AGE
ceph-csi-config   1      17s

2. 创建 csi-kms-config

创建名为“ceph-csi-encryption-kms-config”的ConfigMap,因没有使用KMS,配置内容为空(不能省略,否则后面容器启动会报错)

cat <<EOF > csi-kms-config-map.yaml
---
apiVersion: v1
kind: ConfigMap
data:
  config.json: |-
    {}
metadata:
  name: ceph-csi-encryption-kms-config
EOF

执行 csi-kms-config-map.yaml 文件

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  csi-kms-config-map.yaml
configmap/ceph-csi-encryption-kms-config created
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get cm
NAME                             DATA   AGE
ceph-csi-config                  1      3m40s
ceph-csi-encryption-kms-config   1      27s

3. 创建 secret对象,存储访问ceph的 key

cat <<EOF > csi-rbd-secret.yaml
---
apiVersion: v1
kind: Secret
metadata:
  name: csi-rbd-secret
  namespace: default
stringData:
  userID: kubernetes
  userKey: AQAFOxFlk67IBhAAVHoSfvgdUAGHd/f2mGFurw==
  encryptionPassphrase: test_passphrase
EOF


###说明###
userID 为上面在 ceph 集群中创建的 kubernetes
userKey 通过下面获取,这里无需通过 base64 加密,切记!

root@ceph-node01(192.168.199.44)~/rbd-k8s-storageclass>ceph auth get client.kubernetes
[client.kubernetes]
        key = AQAFOxFlk67IBhAAVHoSfvgdUAGHd/f2mGFurw==
        caps mon = "profile rbd"
        caps osd = "profile rbd pool=kubernetes"
exported keyring for client.kubernetes

执行 csi-rbd-secret.yaml 文件

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f csi-rbd-secret.yaml
secret/csi-rbd-secret created
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get secrets
NAME                  TYPE                                  DATA   AGE
ceph-rbd-secret       kubernetes.io/rbd                     1      139m
csi-rbd-secret        Opaque                                3      2s
default-token-zs2s2   kubernetes.io/service-account-token   3      6h14m

4. 配置访问Kubernetes API对象的ServiceAccount和RBAC

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  csi-provisioner-rbac.yaml
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  csi-nodeplugin-rbac.yaml

5. 运行ceph-csi provisoner和节点插件

在执行之前注意,本次是只有三个节点,因此 master 也需参与csi Pod的运行。

去除 k8s-master 污点:

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl taint node k8s-master node-role.kubernetes.io/master-
node/k8s-master untainted

运行ceph-csi provisoner和节点插件

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  csi-rbdplugin-provisioner.yaml -f csi-rbdplugin.yaml
service/csi-rbdplugin-provisioner created
deployment.apps/csi-rbdplugin-provisioner created
daemonset.apps/csi-rbdplugin created
service/csi-metrics-rbdplugin created

上面两个yaml文件是核心,会生成 Pod运行。

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get po -o wide
NAME                                         READY   STATUS    RESTARTS   AGE   IP               NODE         NOMINATED NODE   READINESS GATES
csi-rbdplugin-8v26m                          3/3     Running   0          35s   192.168.199.41   k8s-master   <none>           <none>
csi-rbdplugin-g6nnt                          3/3     Running   0          35s   192.168.199.43   k8s-node02   <none>           <none>
csi-rbdplugin-mmbmh                          3/3     Running   0          35s   192.168.199.42   k8s-node01   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-f9q4x   7/7     Running   0          35s   10.244.58.201    k8s-node02   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-lgzxc   7/7     Running   0          35s   10.244.85.195    k8s-node01   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-v4g7n   7/7     Running   0          35s   10.244.235.196   k8s-master   <none>           <none>

创建 StorageClass

Kubernetes实现按需创建PV,需要创建StorageClass API对象,如下,

cat <<EOF > storageclass.yaml
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
   name: csi-rbd-sc
provisioner: rbd.csi.ceph.com
parameters:
   clusterID: 3279e32e-9fb2-425c-8310-1fe9dad467da
   pool: kubernetes
   imageFeatures: layering
   csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret
   csi.storage.k8s.io/provisioner-secret-namespace: default
   csi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secret
   csi.storage.k8s.io/controller-expand-secret-namespace: default
   csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret
   csi.storage.k8s.io/node-stage-secret-namespace: default
   csi.storage.k8s.io/fstype: xfs
reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:
   - discard
EOF

###说明###
clusterID: 3279e32e-9fb2-425c-8310-1fe9dad467da  通过 ceph mon dump | egrep fsid 获取
pool: kubernetes  上面通过ceph 集群创建的 kubernetes

执行 storageclass.yaml

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  storageclass.yaml
storageclass.storage.k8s.io/csi-rbd-sc created
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get cm
NAME                             DATA   AGE
ceph-csi-config                  1      29m
ceph-csi-encryption-kms-config   1      26m
rbd.csi.ceph.com-default         0      2m30s

创建PVC

cat <<EOF > pvc.yaml
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: rbd-pvc
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
  storageClassName: csi-rbd-sc
EOF

执行 pvc.yaml

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  pvc.yaml
persistentvolumeclaim/rbd-pvc created

查看PV,PVC

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get pv
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM             STORAGECLASS   REASON   AGE
pvc-972fe141-cc70-4b6d-a5de-4dbba407f3a3   1Gi        RWO            Delete           Bound    default/rbd-pvc   csi-rbd-sc              2s
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get pvc
NAME      STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS   AGE
rbd-pvc   Bound    pvc-972fe141-cc70-4b6d-a5de-4dbba407f3a3   1Gi        RWO            csi-rbd-sc     5s

ok,用户可直接创建 pvc,而 storageclass会自动提供pv。

创建Pod挂载pvc

cat <<EOF > ngx.yaml
---
apiVersion: v1
kind: Pod
metadata:
  labels:
    run: ngx
  name: ngx
spec:
  containers:
  - image: nginx:alpine
    name: ngx
    volumeMounts:
    - name: ceph-pvc
      mountPath: /usr/share/nginx/html
  dnsPolicy: ClusterFirst
  restartPolicy: Always
  volumes:
  - name: ceph-pvc
    persistentVolumeClaim:
      claimName: rbd-pvc
EOF

9. 查看并验证Pod挂载

1.查看挂载
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl exec -it ngx -- sh
/ # df -Th /usr/share/nginx/html/
Filesystem           Type            Size      Used Available Use% Mounted on
/dev/rbd0            xfs          1014.0M     32.4M    981.6M   3% /usr/share/nginx/html

2.写入数据
/ # echo 'hello rbd.' > /usr/share/nginx/html/index.html

3.容器内测试访问
/ # curl localhost
hello rbd.

4.容器外访问测试
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get po -o wide
NAME                                         READY   STATUS    RESTARTS   AGE    IP               NODE         NOMINATED NODE   READINESS GATES
csi-rbdplugin-8v26m                          3/3     Running   0          14m    192.168.199.41   k8s-master   <none>           <none>
csi-rbdplugin-g6nnt                          3/3     Running   0          14m    192.168.199.43   k8s-node02   <none>           <none>
csi-rbdplugin-mmbmh                          3/3     Running   0          14m    192.168.199.42   k8s-node01   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-f9q4x   7/7     Running   0          14m    10.244.58.201    k8s-node02   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-lgzxc   7/7     Running   0          14m    10.244.85.195    k8s-node01   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-v4g7n   7/7     Running   0          14m    10.244.235.196   k8s-master   <none>           <none>
ngx                                          1/1     Running   0          115s   10.244.58.202    k8s-node02   <none>           <none>
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>curl 10.244.58.202
hello rbd.

5.删除Pod,新建并挂载pvc
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl delete -f  ngx.yaml
pod "ngx" deleted
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl apply -f  ngx.yaml
pod/ngx created

6.访问测试
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get po -o wide
NAME                                         READY   STATUS    RESTARTS   AGE   IP               NODE         NOMINATED NODE   READINESS GATES
csi-rbdplugin-8v26m                          3/3     Running   0          15m   192.168.199.41   k8s-master   <none>           <none>
csi-rbdplugin-g6nnt                          3/3     Running   0          15m   192.168.199.43   k8s-node02   <none>           <none>
csi-rbdplugin-mmbmh                          3/3     Running   0          15m   192.168.199.42   k8s-node01   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-f9q4x   7/7     Running   0          15m   10.244.58.201    k8s-node02   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-lgzxc   7/7     Running   0          15m   10.244.85.195    k8s-node01   <none>           <none>
csi-rbdplugin-provisioner-67987db4f4-v4g7n   7/7     Running   0          15m   10.244.235.196   k8s-master   <none>           <none>
ngx                                          1/1     Running   0          23s   10.244.85.196    k8s-node01   <none>           <none>

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>curl 10.244.85.196
hello rbd.

经过测试,数据已经持久化存储到 ceph rbd了。

清理k8s pod资源及 ceph

1. 清除 ceph-csi资源

root@k8s-master(192.168.199.41)~>cd ceph-csi-3.4.0/deploy/rbd/kubernetes/
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl delete -f ./

2. 删除pv

root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl get pv
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS     CLAIM             STORAGECLASS   REASON   AGE
pvc-972fe141-cc70-4b6d-a5de-4dbba407f3a3   1Gi        RWO            Delete           Released   default/rbd-pvc   csi-rbd-sc              30m
root@k8s-master(192.168.199.41)~/ceph-csi-3.4.0/deploy/rbd/kubernetes>kubectl delete pv pvc-972fe141-cc70-4b6d-a5de-4dbba407f3a3
persistentvolume "pvc-972fe141-cc70-4b6d-a5de-4dbba407f3a3" deleted

3. 清理ceph集群

root@ceph-node01(192.168.199.44)~>cd /etc/ceph/

1.删除 rbd
root@ceph-node01(192.168.199.44)/etc/ceph>rbd ls -l kubernetes
NAME                                         SIZE  PARENT FMT PROT LOCK
csi-vol-3ae611a8-5b81-11ee-b587-0aeff6426f30 1 GiB          2

root@ceph-node01(192.168.199.44)/etc/ceph>rbd rm kubernetes/csi-vol-3ae611a8-5b81-11ee-b587-0aeff6426f30
Removing image: 100% complete...done.

2.删除pool
root@ceph-node01(192.168.199.44)/etc/ceph>ceph osd pool rm kubernetes kubernetes --yes-i-really-really-mean-it
pool 'kubernetes' removed

需要重新安装验证下,是否是配置错误造成 pod 需要去找 ceph配置文件还是本该拷贝到k8s-node节点。

注意:将rbd作为pv 的方式,需要将 ceph 的配置文件及密钥推送到k8s每个节点,否则会出现如下报错信息:

Events:
  Type     Reason                  Age   From                     Message
  ----     ------                  ----  ----                     -------
  Normal   Scheduled               17s   default-scheduler        Successfully assigned default/ceph-rbd-pv-pod to k8s-node02
  Normal   SuccessfulAttachVolume  17s   attachdetach-controller  AttachVolume.Attach succeeded for volume "ceph-rbd-pv"
  Warning  FailedMount             16s   kubelet                  MountVolume.WaitForAttach failed for volume "ceph-rbd-pv" : fail to check rbd image status with: (exit status 1), rbd output: (did not load config file, using default settings.
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 Errors while parsing config file!
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 parse_file: cannot open /etc/ceph/ceph.conf: (2) No such file or directory
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 parse_file: cannot open /.ceph/ceph.conf: (2) No such file or directory
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 parse_file: cannot open ceph.conf: (2) No such file or directory
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 Errors while parsing config file!
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 parse_file: cannot open /etc/ceph/ceph.conf: (2) No such file or directory
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 parse_file: cannot open /.ceph/ceph.conf: (2) No such file or directory
2023-09-25 11:49:59.920 7f5b1c38dc80 -1 parse_file: cannot open ceph.conf: (2) No such file or directory
2023-09-25 11:49:59.944 7f5b1c38dc80 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.ks8_pv.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,: (2) No such file or directory
2023-09-25 11:49:59.945 7f5b1c38dc80 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.ks8_pv.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,: (2) No such file or directory
2023-09-25 11:49:59.945 7f5b1c38dc80 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.ks8_pv.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,: (2) No such file or directory
rbd: couldn't connect to the cluster!

推送ceph配置文件到k8s集群的node节点:

1.在/etc/hosts中申明主机名和ip对应关系
root@ceph-node01(192.168.199.44)/etc/ceph>cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.199.41  k8s-master
192.168.199.42  k8s-node01
192.168.199.43  k8s-node02
192.168.199.44  ceph-node01
192.168.199.45  ceph-node02
192.168.199.46  ceph-node03

2.推送ceph配置文件及密钥


k8s - ceph文件存储

在使用 RBD 时,是无法做到跨节点使用的。在 k8s 中,使用更多的是 deployment 无状态的Pod,随时可能重建并漂移到其他节点。基于这些场景来说,似乎 cephfs 更加适合 k8s。

ceph rbd 仅支持同一个node下多 pod 的挂载,cephfs 支持跨节点的多 pod 挂载,实现共享。

K8S volume存储卷与cephfs集成

创建k8s-volume使用的文件存储

注意:ceph-node01 操作

  1. 创建MDS
root@localhost(192.168.199.44)/etc/ceph>ceph-deploy mds create ceph-node0{1..3}
  1. 创建存储池

一个 ceph 文件系统需要至少两个 RADOS 存储池,一个用于存储数据,一个用于存储元数据

1.创建cephfs_pool存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool create cephfs_data 128
pool 'cephfs_data' created

2.创建cephfs_metadata存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool create cephfs_metadata 64
pool 'cephfs_metadata' created

3.查看存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool ls
cephfs_pool
cephfs_metadata

4.创建cephfs
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 2 and data pool 1

5.设置最大活动数
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs set cephfs max_mds 2

5.查看cephfs
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_pool ]

创建k8s访问cephfs的认证用户

注意:ceph-node01 操作

K8S想要访问Ceph中的RBD块设备,必须通过一个认证用户才可以访问,如果没有认证用户则无法访问Ceph集群中的块设备。

命令格式:ceph auth get-or-create {用户名称} mon '{访问mon的方式}' osd '{访问osd的方式}'

ceph auth get-or-create client.cephfs mon 'allow r' mds 'allow rw' osd 'allow rw pool=cephfs_data, allow rw pool=cephfs_metadata'

ceph auth get  client.cephfs
[client.cephfs]
        key = AQBs2U1lK+uHKBAAGTEH5fSx5mjBPoH87CxHIQ==
        caps mds = "allow rw"
        caps mon = "allow r"
        caps osd = "allow rw pool=cephfs_data, allow rw pool=cephfs_metadata"
exported keyring for client.cephfs

本地测试挂载并创建目录

注意:ceph-node01 操作

1.挂载cephfs
root@ceph-node01(192.168.199.44)/root> mount.ceph ceph-node01:6789:/ /mnt/cephfs/ -o name=cephfs,secret=AQBs2U1lK+uHKBAAGTEH5fSx5mjBPoH87CxHIQ==

2.创建目录
root@ceph-node01(192.168.199.44)/root> mkdir -p /mnt/cephfs/html

3.创建首页文件
root@localhost(192.168.199.44)/root>echo "$(date) hello cephfs." > /mnt/cephfs/html/index.html

将认证的用户key存放在k8s Secret资源中

  1. 将key通过 base64 进行加密
root@ceph-node01(192.168.199.44)~>ceph auth get-key client.cephfs | base64
QVFCczJVMWxLK3VIS0JBQUdURUg1ZlN4NW1qQlBvSDg3Q3hISVE9PQ==
  1. 将加密后的key 存在 secret 中
root@k8s-master(192.168.199.41)~>mkdir -pv manifests
mkdir: created directory ‘manifests’
root@k8s-master(192.168.199.41)~>cd manifests/
root@k8s-master(192.168.199.41)~/manifests>vim cephfs-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: cephfs-secret
data:
  key: QVFCczJVMWxLK3VIS0JBQUdURUg1ZlN4NW1qQlBvSDg3Q3hISVE9PQ==
  1. 创建 secret资源

RBD的Secret要与Pod在同一Namespace下,如果不同的Namespace的Pod都需要使用RBD进行存储,则需要在每个Namespace下都进行创建。

root@k8s-master(192.168.199.41)/root/manifests> kubectl apply -f cephfs-secret.yaml
secret/cephfs-secret created
root@k8s-master(192.168.199.41)/root/manifests> kubectl get secret
NAME                  TYPE                                  DATA   AGE
cephfs-secret         Opaque                                1      4s
default-token-hp7gb   kubernetes.io/service-account-token   3      48m

在k8s集群所有节点安装ceph-common

ceph-common是 ceph 命令包,需要在每个节点安装,否则将无法执行命令对ceph 进行操作。

root@k8s-master(192.168.199.41)~>yum install -y ceph-common
root@k8s-node01(192.168.199.42)~>yum install -y ceph-common
root@k8s-node02(192.168.199.43)~>yum install -y ceph-common

创建pod资源挂载cephfs文件存储进行数据持久化

  1. 编写编排文件
root@k8s-master(192.168.199.41)~>cd manifests/

root@k8s-master(192.168.199.41)/root/manifests> cat pod-ngx.yaml
apiVersion: v1
kind: Pod
metadata:
  labels:
    run: ngx
  name: ngx
spec:
  containers:
  - image: nginx:alpine
    name: ngx
    volumeMounts:
    - name: html
      mountPath: /usr/share/nginx/html
  restartPolicy: Always
  volumes:
  - name: html
    cephfs:
      monitors:
      - 192.168.199.44:6789
      - 192.168.199.45:6789
      - 192.168.199.46:6789
      path: /html
      user: cephfs
      secretRef:
        name: cephfs-secret
  1. 创建pod资源
root@k8s-master(192.168.199.41)/root/manifests> kubectl apply  -f pod-ngx.yaml
pod/ngx created

查看创建的pod
root@k8s-master(192.168.199.41)/root/manifests> kubectl get po -o wide
NAME   READY   STATUS    RESTARTS   AGE   IP              NODE         NOMINATED NODE   READINESS GATES
ngx    1/1     Running   0          74s   10.244.85.196   k8s-node01   <none>           <none>




查看创建过程
root@k8s-master(192.168.199.41)~/manifests>kubectl describe po ngx
...
Volumes:
  html:
    Type:        CephFS (a CephFS mount on the host that shares a pod's lifetime)
    Monitors:    [192.168.199.44:6789 192.168.199.45:6789 192.168.199.46:6789]
    Path:        /html
    User:        cephfs
    SecretFile:
    SecretRef:   &LocalObjectReference{Name:cephfs-secret,}
    ReadOnly:    false
...
Events:
  Type    Reason     Age   From               Message
  ----    ------     ----  ----               -------
  Normal  Scheduled  7s    default-scheduler  Successfully assigned default/ngx to k8s-node01
  Normal  Pulled     6s    kubelet            Container image "nginx:alpine" already present on machine
  Normal  Created    6s    kubelet            Created container ngx
  Normal  Started    5s    kubelet            Started container ngx

查看pod资源挂载的cephfs信息

1. 直接访问Pod

root@k8s-master(192.168.199.41)/root/manifests> kubectl get po -o wide
NAME   READY   STATUS    RESTARTS   AGE    IP              NODE         NOMINATED NODE   READINESS GATES
ngx    1/1     Running   0          108s   10.244.85.197   k8s-node01   <none>           <none>
root@k8s-master(192.168.199.41)/root/manifests> curl 10.244.85.197
Fri Nov 10 16:07:59 CST 2023 hello cephfs.

2. 在宿主机上查看挂载的cephfs信息

为什么会在Pod中看到挂载的cephfs,其实是宿主机将目录挂载到了容器的某个路径中

首先查看Pod运行在了哪个Node节点上,然后查看cephfs的挂载信息。

root@k8s-master(192.168.199.41)/root/manifests> kubectl get po -o wide
NAME   READY   STATUS    RESTARTS   AGE     IP              NODE         NOMINATED NODE   READINESS GATES
ngx    1/1     Running   0          3m30s   10.244.85.197   k8s-node01   <none>           <none>

root@k8s-node01(192.168.199.42)~>df -Th | egrep html
192.168.199.44:6789,192.168.199.45:6789,192.168.199.46:6789:/html ceph      8.5G     0  8.5G   0% /var/lib/kubelet/pods/a6df1c7c-9d13-4b8e-a8ed-18e1af363f58/volumes/kubernetes.io~cephfs/html

3. 当pod资源被删除后观察Node节点挂载的cephfs状态

Pod资源被删除后,Node节点挂载的RBD块设备自动删除。

mater上删除pod
root@k8s-master(192.168.199.41)~/manifests>kubectl delete -f pod-ngx.yaml
deployment.apps "ngx" deleted

node节点再次查看挂载
root@k8s-node01(192.168.199.42)~>df -Th | egrep html #没了

4. 多个Pod同时使用一个cephfs会有什么效果

cephfs 支持跨节点的多 pod 挂载,实现共享,这里创建多个 ngx 可实现负载均衡的效果。

root@k8s-master(192.168.199.41)/root/manifests> kubectl apply -f pod-ngx.yaml -f pod-ngx-1.yaml
pod/ngx created
pod/ngx-1 created

root@k8s-master(192.168.199.41)/root/manifests> kubectl get po -o wide
NAME    READY   STATUS    RESTARTS   AGE   IP              NODE         NOMINATED NODE   READINESS GATES
ngx     1/1     Running   0          45s   10.244.85.198   k8s-node01   <none>           <none>
ngx-1   1/1     Running   0          45s   10.244.58.194   k8s-node02   <none>           <none>

root@k8s-master(192.168.199.41)/root/manifests> curl 10.244.85.198
Fri Nov 10 16:07:59 CST 2023 hello cephfs.

root@k8s-master(192.168.199.41)/root/manifests> curl 10.244.58.194
Fri Nov 10 16:07:59 CST 2023 hello cephfs.

清理k8s pod资源及 cephfs

注意:这里清理本章演示的内容,为接下来的章节做准备。

1. 清理k8s集群pod资源

root@k8s-master(192.168.199.41)~/manifests>kubectl get po -o wide
NAME    READY   STATUS    RESTARTS   AGE   IP              NODE         NOMINATED NODE   READINESS GATES
ngx     1/1     Running   0          41s   10.244.85.198   k8s-node01   <none>           <none>
ngx-1   1/1     Running   0          41s   10.244.58.194   k8s-node02   <none>           <none>
root@k8s-master(192.168.199.41)/root/manifests> kubectl delete -f pod-ngx.yaml -f pod-ngx-1.yaml
pod "ngx" deleted
pod "ngx-1" deleted

2. 清理 secret

root@k8s-master(192.168.199.41)/root/manifests> kubectl get secret
NAME                  TYPE                                  DATA   AGE
cephfs-secret         Opaque                                1      51m
default-token-hp7gb   kubernetes.io/service-account-token   3      100m
root@k8s-master(192.168.199.41)/root/manifests> kubectl delete secret cephfs-secret
secret "cephfs-secret" deleted

3. 清理cephfs及pool

1.查看cephfs
root@ceph-node01(192.168.199.44)/root> ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]


2.关闭mds服务
root@ceph-node01(192.168.199.44)/root> systemctl stop ceph-mds.target
root@ceph-node01(192.168.199.45)/root> systemctl stop ceph-mds.target
root@ceph-node01(192.168.199.46)/root> systemctl stop ceph-mds.target

3.删除cephfs
root@ceph-node01(192.168.199.44)/root> ceph fs rm cephfs --yes-i-really-mean-it

4.再次查看
root@ceph-node01(192.168.199.44)/root> ceph fs ls
No filesystems enabled

5.删除pool
root@ceph-node01(192.168.199.44)/root> ceph osd pool rm cephfs_metadata cephfs_metadata --yes-i-really-really-mean-it
pool 'cephfs_metadata' removed
root@ceph-node01(192.168.199.44)/root> ceph osd pool rm cephfs_data cephfs_data  --yes-i-really-really-mean-it
pool 'cephfs_data' removed
root@ceph-node01(192.168.199.44)/root> ceph osd pool ls

4. 删除认证用户

root@ceph-node01(192.168.199.44)/root> ceph auth get  client.cephfs
[client.cephfs]
        key = AQBs2U1lK+uHKBAAGTEH5fSx5mjBPoH87CxHIQ==
        caps mds = "allow rw"
        caps mon = "allow r"
        caps osd = "allow rw pool=cephfs_data, allow rw pool=cephfs_metadata"
exported keyring for client.cephfs

root@ceph-node01(192.168.199.44)/root> ceph auth rm   client.cephfs
updated

K8S PV存储卷与cephfs集成

创建k8s-volume使用的文件存储

注意:ceph-node01 操作

  1. 创建MDS
root@localhost(192.168.199.44)/etc/ceph>ceph-deploy mds create ceph-node0{1..3}
  1. 创建存储池

一个 ceph 文件系统需要至少两个 RADOS 存储池,一个用于存储数据,一个用于存储元数据

1.创建cephfs_pool存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool create cephfs_data 128
pool 'cephfs_data' created

2.创建cephfs_metadata存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool create cephfs_metadata 64
pool 'cephfs_metadata' created

3.查看存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool ls
cephfs_pool
cephfs_metadata

4.创建cephfs
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 2 and data pool 1

5.设置最大活动数
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs set cephfs max_mds 2

5.查看cephfs
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_pool ]

创建k8s访问cephfs的认证用户

注意:ceph-node01 操作

K8S想要访问Ceph中的RBD块设备,必须通过一个认证用户才可以访问,如果没有认证用户则无法访问Ceph集群中的块设备。

命令格式:ceph auth get-or-create {用户名称} mon '{访问mon的方式}' osd '{访问osd的方式}'

ceph auth get-or-create client.cephfs mon 'allow r' mds 'allow rw' osd 'allow rw pool=cephfs_data, allow rw pool=cephfs_metadata'

ceph auth get  client.cephfs
[client.cephfs]
        key = AQDB701lKSe7AhAAqw3/xa/ZShOwQdeiNCa01w==
        caps mds = "allow rw"
        caps mon = "allow r"
        caps osd = "allow rw pool=cephfs_data, allow rw pool=cephfs_metadata"
exported keyring for client.cephfs

本地测试挂载并创建目录

注意:ceph-node01 操作

1.挂载cephfs
root@ceph-node01(192.168.199.44)/root> mount.ceph ceph-node01:6789:/ /mnt/cephfs/ -o name=cephfs,secret=AQDB701lKSe7AhAAqw3/xa/ZShOwQdeiNCa01w==

2.创建目录
root@ceph-node01(192.168.199.44)/root> mkdir -p /mnt/cephfs/html

3.创建首页文件
root@localhost(192.168.199.44)/root>echo "$(date) hello cephfs." > /mnt/cephfs/html/index.html

将认证的用户key存放在k8s Secret资源中

  1. 将key通过 base64 进行加密
root@ceph-node01(192.168.199.44)~>ceph auth get-key client.cephfs | base64
QVFEQjcwMWxLU2U3QWhBQXF3My94YS9aU2hPd1FkZWlOQ2EwMXc9PQ==
  1. 将加密后的key 存在 secret 中
root@k8s-master(192.168.199.41)~>mkdir -pv manifests
mkdir: created directory ‘manifests’
root@k8s-master(192.168.199.41)~>cd manifests/
root@k8s-master(192.168.199.41)~/manifests>vim cephfs-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: cephfs-secret
data:
  key: QVFEQjcwMWxLU2U3QWhBQXF3My94YS9aU2hPd1FkZWlOQ2EwMXc9PQ==
  1. 创建 secret资源

RBD的Secret要与Pod在同一Namespace下,如果不同的Namespace的Pod都需要使用RBD进行存储,则需要在每个Namespace下都进行创建。

root@k8s-master(192.168.199.41)/root/manifests> kubectl apply -f cephfs-secret.yaml
secret/cephfs-secret created
root@k8s-master(192.168.199.41)/root/manifests> kubectl get secret
NAME                  TYPE                                  DATA   AGE
cephfs-secret         Opaque                                1      4s
default-token-hp7gb   kubernetes.io/service-account-token   3      48m

在k8s集群所有节点安装ceph-common

ceph-common是 ceph 命令包,需要在每个节点安装,否则将无法执行命令对ceph 进行操作。

root@k8s-master(192.168.199.41)~>yum install -y ceph-common
root@k8s-node01(192.168.199.42)~>yum install -y ceph-common
root@k8s-node02(192.168.199.43)~>yum install -y ceph-common

创建PV及PVC资源使用cephfs作为文件存储

在K8S集群中创建PV及PVC存储资源,主要是对PV进行了一些配置,存储底层采用Ceph集群的cephfs

1. 编写资源编排文件

root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> cat cephfs-pv-pvc.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
  name: cephfs-pv
  namespace: default
spec:
  capacity:
    storage: 1Gi
  accessModes:
  - ReadWriteMany
  cephfs:
    monitors:
    - 192.168.199.44:6789
    - 192.168.199.45:6789
    - 192.168.199.46:6789
    path: /
    user: cephfs
    secretRef:
      name: cephfs-secret
  persistentVolumeReclaimPolicy: Recycle
  storageClassName: cephfs
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: cephfs-pvc
  namespace: default
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 1Gi
  volumeName: cephfs-pv
  storageClassName: cephfs

2. 在集群中创建pv和pvc

1.创建资源
root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl apply -f cephfs-pv-pvc.yaml
persistentvolume/cephfs-pv created
persistentvolumeclaim/cephfs-pvc created

2.查看创建的资源
root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl get pv,pvc
NAME                         CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                STORAGECLASS   REASON   AGE
persistentvolume/cephfs-pv   1Gi        RWX            Recycle          Bound    default/cephfs-pvc   cephfs                  12s

NAME                               STATUS   VOLUME      CAPACITY   ACCESS MODES   STORAGECLASS   AGE
persistentvolumeclaim/cephfs-pvc   Bound    cephfs-pv   1Gi        RWX            cephfs         12s

创建Pod资源挂载PV存储卷并写入数据

1. 编写Pod资源编排文件

root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> cat cephfs-pod.yaml
apiVersion: v1
kind: Pod
metadata:
  labels:
    run: ngx
  name: ngx
spec:
  containers:
  - image: nginx:alpine
    name: ngx
    volumeMounts:
    - name: html
      mountPath: /usr/share/nginx/html
  restartPolicy: Always
  volumes:
  - name: html
    persistentVolumeClaim:
      claimName: cephfs-pvc
      readOnly: false

2. 在集群中创建Pod资源

root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl apply -f cephfs-pod.yaml
pod/ngx created

3. 直接访问页面

root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl get po -o wide
NAME   READY   STATUS    RESTARTS   AGE   IP              NODE         NOMINATED NODE   READINESS GATES
ngx    1/1     Running   0          58s   10.244.85.199   k8s-node01   <none>           <none>
root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> curl 10.244.85.199
Fri Nov 10 17:12:13 CST 2023 hello cephfs.

清理k8s pod资源及 cephfs

注意:这里清理本章演示的内容,为接下来的章节做准备。

1. 清理k8s集群pod资源

root@k8s-master(192.168.199.41)/root> cd manifests/cephfs-pv/
root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl delete -f cephfs-pod.yaml
pod "ngx" deleted

2. 清理 pvc, pv

1.删除pvc
root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl delete pvc cephfs-pvc
persistentvolumeclaim "cephfs-pvc" deleted

2.删除pv
root@k8s-master(192.168.199.41)/root/manifests/cephfs-pv> kubectl delete pv cephfs-pv
persistentvolume "cephfs-pv" deleted

3. 清理ceph块存储及pool

1.停止所有mds服务
root@ceph-node01(192.168.199.44)/root> systemctl stop ceph-mds.target
root@ceph-node01(192.168.199.45)/root> systemctl stop ceph-mds.target
root@ceph-node01(192.168.199.46)/root> systemctl stop ceph-mds.target

2.删除cephfs
root@ceph-node01(192.168.199.44)/root> ceph fs rm cephfs --yes-i-really-mean-it

3.删除pool
root@ceph-node01(192.168.199.44)/root> ceph osd pool rm cephfs_data cephfs_data  --yes-i-really-really-mean-it
pool 'cephfs_data' removed

root@ceph-node01(192.168.199.44)/root> ceph osd pool rm cephfs_metadata cephfs_metadata  --yes-i-really-really-mean-it
pool 'cephfs_metadata' removed

4. 删除认证用户

root@ceph-node01(192.168.199.44)/etc/ceph>ceph auth rm client.cephfs
updated

K8S storageclass 与 cephfs集成

创建k8s-volume使用的文件存储

注意:ceph-node01 操作

创建存储池

一个 ceph 文件系统需要至少两个 RADOS 存储池,一个用于存储数据,一个用于存储元数据

1.创建cephfs_pool存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool create cephfs_data 128
pool 'cephfs_data' created

2.创建cephfs_metadata存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool create cephfs_metadata 64
pool 'cephfs_metadata' created

3.查看存储池
root@ceph-node01(192.168.199.44)/etc/ceph> ceph osd pool ls
cephfs_pool
cephfs_metadata

4.创建cephfs
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 2 and data pool 1

5.设置最大活动数
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs set cephfs max_mds 2

6.查看cephfs
root@ceph-node01(192.168.199.44)/etc/ceph> ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_pool ]

创建k8s访问cephfs的认证用户

注意:ceph-node01 操作

K8S想要访问Ceph中的RBD块设备,必须通过一个认证用户才可以访问,如果没有认证用户则无法访问Ceph集群中的块设备。

命令格式:ceph auth get-or-create {用户名称} mon '{访问mon的方式}' osd '{访问osd的方式}'

ceph auth get-or-create client.cephfs mon 'allow r' mds 'allow rw' osd 'allow rw pool=cephfs_data, allow rw pool=cephfs_metadata'

ceph auth get  client.cephfs
[client.cephfs]
        key = AQDW9k1lDBtfBRAAP7aBGqZMXSXlNeWbmQNAoQ==
        caps mds = "allow rw"
        caps mon = "allow r"
        caps osd = "allow rw pool=cephfs_data, allow rw pool=cephfs_metadata"
exported keyring for client.cephfs

本地测试挂载并创建目录

注意:ceph-node01 操作

1.挂载cephfs
root@ceph-node01(192.168.199.44)/root> mount.ceph ceph-node01:6789:/ /mnt/cephfs/ -o name=cephfs,secret=AQDW9k1lDBtfBRAAP7aBGqZMXSXlNeWbmQNAoQ==
2.创建首页文件
root@localhost(192.168.199.44)/root>echo "$(date) hello cephfs." > /mnt/cephfs/index.html

将认证的用户key存放在k8s Secret资源中

  1. 将key通过 base64 进行加密
root@ceph-node01(192.168.199.44)~>ceph auth get-key client.cephfs | base64
QVFEQjcwMWxLU2U3QWhBQXF3My94YS9aU2hPd1FkZWlOQ2EwMXc9PQ==

在k8s集群所有节点安装ceph-common

ceph-common是 ceph 命令包,需要在每个节点安装,否则将无法执行命令对ceph 进行操作。

root@k8s-master(192.168.199.41)~>yum install -y ceph-common
root@k8s-node01(192.168.199.42)~>yum install -y ceph-common
root@k8s-node02(192.168.199.43)~>yum install -y ceph-common

部署 ceph-csi

ceph-csi 链接:https://github.com/ceph/ceph-csi/tree/devel/deploy

StorageClass资源可以通过客户端根据用户的需求自动创建出PV以及PVC资源。

StorageClass使用Ceph作为底层存储,为用户自动创建出PV以及PVC资源,使用的客户端工具是csi,首先需要在K8S集群中部署csi客户端工具,由csi客户端中驱动去连接Ceph集群。

注意:在部署 ceph-csi 时,需要特别注意版本,ceph-csi所使用的版本依据ceph和k8s版本而定。

本次版本如下:

ceph: nautilus
k8s: v1.19.7
ceph-csi: 3.4.0

ceph-csi 下载链接: https://github.com/ceph/ceph-csi/archive/refs/tags/v3.4.0.tar.gz

root@k8s-master(192.168.199.41)~>tar xf ceph-csi-3.4.0.tar.gz
root@k8s-master(192.168.199.41)/root> cd ceph-csi-3.4.0/deploy/cephfs/kubernetes/
  1. 修改 csi-config-map.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> vim csi-config-map.yaml

---
apiVersion: v1
kind: ConfigMap
data:
  config.json: |-
    [
      {
        "clusterID": "212d53e8-9766-44a4-9cfd-d9ca6ae44882",	//通过 ceph mon dump 查看fsid
        "monitors": [
          "192.168.199.44:6789",	//所有ceph mon节点
          "192.168.199.45:6789",
          "192.168.199.46:6789"
        ]
      }
    ]
metadata:
  name: ceph-csi-config
  1. k8s-master节点参与Pod调度

这里只对三个节点的集群,如果k8s-node节点超过三个不用执行该步骤。因为安装 csi-cephfs需要在三个node节点上调度。

root@k8s-master(192.168.199.41)/root> kubectl taint nodes k8s-master node-role.kubernetes.io/master:NoSchedule-
  1. 部署 cephfs csi
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> kubectl apply -f csi-config-map.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> kubectl apply -f csi-provisioner-rbac.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> kubectl apply -f csi-nodeplugin-rbac.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> kubectl apply -f csi-cephfsplugin-provisioner.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> kubectl apply -f csi-cephfsplugin.yaml

查看启动的Pods是否正常

root@k8s-master(192.168.199.41)/root> kubectl get po -o wide
NAME                                            READY   STATUS    RESTARTS   AGE     IP               NODE         NOMINATED NODE   READINESS GATES
csi-cephfsplugin-6p5vt                          3/3     Running   0          3m53s   192.168.199.41   k8s-master   <none>           <none>
csi-cephfsplugin-ds24t                          3/3     Running   0          3m53s   192.168.199.43   k8s-node02   <none>           <none>
csi-cephfsplugin-provisioner-55fb69589f-7kjdq   6/6     Running   0          4m6s    10.244.58.196    k8s-node02   <none>           <none>
csi-cephfsplugin-provisioner-55fb69589f-v5blc   6/6     Running   0          4m6s    10.244.85.203    k8s-node01   <none>           <none>
csi-cephfsplugin-provisioner-55fb69589f-wjw5j   6/6     Running   0          4m6s    10.244.235.196   k8s-master   <none>           <none>
csi-cephfsplugin-q9qmc                          3/3     Running   0          3m53s   192.168.199.42   k8s-node01   <none>           <none>

4.创建密钥

root@k8s-master(192.168.199.41)/root> cd ceph-csi-3.4.0/examples/cephfs/
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> vim secret.yaml

---
apiVersion: v1
kind: Secret
metadata:
  name: csi-cephfs-secret
  namespace: default
stringData:
  # Required for statically provisioned volumes
  userID: cephfs	//上面创建 cephfs 的认证用户
  userKey: AQDW9k1lDBtfBRAAP7aBGqZMXSXlNeWbmQNAoQ==	//通过 ceph auth get-key client.cephfs 查看,无需通过 base64 转换

  # Required for dynamically provisioned volumes
  adminID: admin	// ceph管理员认证用户
  adminKey: AQDG0U1lita+ABAAVoqPu5pxe/7dMh2UdXsRKA== //通过 ceph auth get-key client.admin 查看,无需通过 base64 转换
  
  root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl apply -f secret.yaml

创建StorageClass

root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> vim storageclass.yaml

---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: csi-cephfs-sc
provisioner: cephfs.csi.ceph.com
parameters:
  # (required) String representing a Ceph cluster to provision storage from.
  # Should be unique across all Ceph clusters in use for provisioning,
  # cannot be greater than 36 bytes in length, and should remain immutable for
  # the lifetime of the StorageClass in use.
  # Ensure to create an entry in the configmap named ceph-csi-config, based on
  # csi-config-map-sample.yaml, to accompany the string chosen to
  # represent the Ceph cluster in clusterID below
  clusterID: 212d53e8-9766-44a4-9cfd-d9ca6ae44882	//通过 ceph mon dump 查看fsid

  # (required) CephFS filesystem name into which the volume shall be created
  # eg: fsName: myfs
  fsName: cephfs	// 通过 ceph fs ls 查看

  # (optional) Ceph pool into which volume data shall be stored
  # pool: <cephfs-data-pool>

  # (optional) Comma separated string of Ceph-fuse mount options.
  # For eg:
  # fuseMountOptions: debug

  # (optional) Comma separated string of Cephfs kernel mount options.
  # Check man mount.ceph for mount options. For eg:
  # kernelMountOptions: readdir_max_bytes=1048576,norbytes

  # The secrets have to contain user and/or Ceph admin credentials.
  csi.storage.k8s.io/provisioner-secret-name: csi-cephfs-secret
  csi.storage.k8s.io/provisioner-secret-namespace: default
  csi.storage.k8s.io/controller-expand-secret-name: csi-cephfs-secret
  csi.storage.k8s.io/controller-expand-secret-namespace: default
  csi.storage.k8s.io/node-stage-secret-name: csi-cephfs-secret
  csi.storage.k8s.io/node-stage-secret-namespace: default

  # (optional) The driver can use either ceph-fuse (fuse) or
  # ceph kernelclient (kernel).
  # If omitted, default volume mounter will be used - this is
  # determined by probing for ceph-fuse and mount.ceph
  # mounter: kernel

  # (optional) Prefix to use for naming subvolumes.
  # If omitted, defaults to "csi-vol-".
  # volumeNamePrefix: "foo-bar-"

reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:
  - discard	//debug修改为discard
  
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl apply -f storageclass.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl get sc
NAME            PROVISIONER           RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
csi-cephfs-sc   cephfs.csi.ceph.com   Delete          Immediate           true                   2d15h

创建PVC

root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> vim pvc.yaml

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: csi-cephfs-pvc
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 1Gi
  storageClassName: csi-cephfs-sc
  
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl apply -f pvc.yaml
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl get pvc,pv
NAME                                   STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS    AGE
persistentvolumeclaim/csi-cephfs-pvc   Bound    pvc-c7023d0d-f100-49ea-8010-b8752a5b46ee   1Gi        RWX            csi-cephfs-sc   9s

NAME                                                        CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                    STORAGECLASS    REASON   AGE
persistentvolume/pvc-c7023d0d-f100-49ea-8010-b8752a5b46ee   1Gi        RWX            Delete           Bound    default/csi-cephfs-pvc   csi-cephfs-sc            8s

创建Pod挂载pvc

root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> cat pod.yaml
---
apiVersion: v1
kind: Pod
metadata:
  name: csi-cephfs-demo-pod
spec:
  containers:
    - name: web-server
      image: docker.io/library/nginx:latest
      volumeMounts:
        - name: mypvc
          mountPath: /usr/share/nginx/html	//这里修改为nginx访问根目录
  volumes:
    - name: mypvc
      persistentVolumeClaim:
        claimName: csi-cephfs-pvc
        readOnly: false


root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl apply -f pod.yaml

测试验证

cephfs 挂载到本地写入 index.html ,然后通过 Pod 访问

  1. 挂载到本地
root@ceph-node01(192.168.199.44)/root> mount.ceph ceph-node01:6789:/ /mnt/cephfs/ -o name=cephfs,secret=AQDW9k1lDBtfBRAAP7aBGqZMXSXlNeWbmQNAoQ==
  1. 写入文件
root@ceph-node01(192.168.199.44)/root> cd /mnt/cephfs/
1.pvc生成的目录是在这个 volumes里
root@ceph-node01(192.168.199.44)/mnt/cephfs> ls
html/  index.html  volumes/

2.写入文件
root@ceph-node01(192.168.199.44)~>echo "$(date) hello cephfs." > /mnt/cephfs/volumes/csi/csi-vol-ff2f1bae-81d1-11ee-86dc-6aa525639d6b/b8209261-dbc2-4277-b6b8-ac98e6786215/index.html

3.Pod地址访问
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl get po -o wide
cNAME                                            READY   STATUS    RESTARTS   AGE     IP               NODE         NOMINATED NODE   READINESS GATES
csi-cephfsplugin-provisioner-55fb69589f-gb77z   6/6     Running   0          10m     10.244.58.197    k8s-node02   <none>           <none>
csi-cephfsplugin-provisioner-55fb69589f-m768l   6/6     Running   0          10m     10.244.235.197   k8s-master   <none>           <none>
csi-cephfsplugin-provisioner-55fb69589f-v864s   6/6     Running   0          10m     10.244.85.205    k8s-node01   <none>           <none>
csi-cephfsplugin-rjxl4                          3/3     Running   0          10m     192.168.199.43   k8s-node02   <none>           <none>
csi-cephfsplugin-mlb2j                          3/3     Running   0          10m     192.168.199.41   k8s-master   <none>           <none>
csi-cephfsplugin-n967b                          3/3     Running   0          10m     192.168.199.42   k8s-node01   <none>           <none>
csi-cephfs-demo-pod                             1/1     Running   0          2m53s   10.244.85.206    k8s-node01   <none>           <none>
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> curl 10.244.85.206
Mon Nov 13 11:18:21 CST 2023 hello cephfs.

清理k8s pod资源及 ceph

1. 清除 ceph-csi资源

root@k8s-master(192.168.199.41)/root> cd ceph-csi-3.4.0/examples/cephfs/
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl delete -f ./

2. 删除pv

root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl get pv
NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                    STORAGECLASS    REASON   AGE
pvc-67fecbaf-de59-40c4-9c69-467204eafe5e   1Gi        RWX            Delete           Bound    default/csi-cephfs-pvc   csi-cephfs-sc            146m
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/examples/cephfs> kubectl delete pv pvc-67fecbaf-de59-40c4-9c69-467204eafe5e

3.删除csi

root@k8s-master(192.168.199.41)/root> cd ceph-csi-3.4.0/deploy/cephfs/kubernetes/
root@k8s-master(192.168.199.41)/root/ceph-csi-3.4.0/deploy/cephfs/kubernetes> kubectl delete -f ./

3. 清理ceph集群

root@ceph-node01(192.168.199.44)~>cd /etc/ceph/

1.关闭所有mds服务
root@ceph-node01(192.168.199.44)/root> systemctl stop ceph-mds.target
root@ceph-node01(192.168.199.45)/root> systemctl stop ceph-mds.target
root@ceph-node01(192.168.199.46)/root> systemctl stop ceph-mds.target

2.删除 cephfs
root@ceph-node01(192.168.199.44)/etc/ceph>ceph osd pool rm kubernetes kubernetes --yes-i-really-really-mean-it
pool 'kubernetes' removed

3.删除pool
root@ceph-node01(192.168.199.44)/root> ceph osd pool rm cephfs_data cephfs_data   --yes-i-really-really-mean-it
pool 'cephfs_data' removed
root@ceph-node01(192.168.199.44)/root> ceph osd pool rm cephfs_metadata cephfs_metadata   --yes-i-really-really-mean-it
pool 'cephfs_metadata' removed

参考链接

ceph集群使用RBD块存储与k8s集群集成:https://juejin.cn/post/7119298577569939469

k8s使用ceph-csi插件的cephfs方式持久化存储:https://blog.csdn.net/HYESC/article/details/128039771