ceph(三)实现ceph块存储的挂载及存储空间的动态伸缩

发布时间 2023-09-24 15:43:22作者: areke

1. 客户端使用普通账户挂载并使用RBD

RBD(RADOS Block Devices)即块存储设备,RBD可以为KVM、VMware等虚拟化技术和云服务(OpenStack、kubernetes)提供高性能和无限可扩展的存储后端,客户端基于librbd库即可将RADOS存储集群用作块设备,不过,用于rbd的存储池需要事先启用rbd功能并进行初始化。

1.1 创建RBD

创建一个名为myrbd1的存储池,并在启用rbd功能后对其进行初始化。

# 创建存储池,指定pg和pgp的数量,pgp是对存在于pg的数据进行组合存储,pgp通常等于pg的值
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph osd pool create rbd-data1 32 32 
pool 'rbd-data1' created

# 查看存储池
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph osd pool ls
device_health_metrics
mypool
myrbd1
rbd-data1

# 对存储池开启rbd功能
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph osd pool application enable rbd-data1 rbd
enabled application 'rbd' on pool 'rbd-data1'

# 使用rbd命令对存储池初始化
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd pool init -p rbd-data1

1.2 创建并验证img

rbd存储池并不能直接用于块设备,而是需要事先在其中按需创建映像(image) ,并把映像文件作为块设备使用, rbd命令可用于创建、查看及删除块设备所在的映像(image),以及克隆映像、创建快照、将映像回滚到快照和查看快照等管理操作。

# 创建两个镜像
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd create data-img1 --size 3G --pool rbd-data1    
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd create data-img2 --size 5G --pool rbd-data1

# 验证镜像
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls --pool rbd-data1
data-img1
data-img2
# 列出镜像更多信息
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls --pool rbd-data1 -l
NAME       SIZE   PARENT  FMT  PROT  LOCK
data-img1  3 GiB            2        
data-img2  5 GiB            2        


# 查看镜像详细信息
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd --image data-img2 --pool rbd-data1 info
rbd image 'data-img2':
	size 5 GiB in 1280 objects
	order 22 (4 MiB objects)
	snapshot_count: 0
	id: d69362489058
	block_name_prefix: rbd_data.d69362489058
	format: 2
	features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
	op_features: 
	flags: 
	create_timestamp: Sun Sep 24 02:38:54 2023
	access_timestamp: Sun Sep 24 02:38:54 2023
	modify_timestamp: Sun Sep 24 02:38:54 2023

# 以json格式显示镜像信息
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls --pool rbd-data1 -l --format json --pretty-format
[
    {
        "image": "data-img1",
        "id": "d67e97824a24",
        "size": 3221225472,
        "format": 2
    },
    {
        "image": "data-img2",
        "id": "d69362489058",
        "size": 5368709120,
        "format": 2
    }
]

1.3 创建普通用户并授权

# 创建普通用户
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph auth get-or-create client.tom mon 'allow r' osd 'allow rwx pool=rbd-data1'
[client.tom]
	key = AQCWMw9lHyZEORAA9brfXF6K0WWMLyz9a/yhPw==

# 验证用户信息
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph auth get client.tom
[client.tom]
	key = AQCWMw9lHyZEORAA9brfXF6K0WWMLyz9a/yhPw==
	caps mon = "allow r"
	caps osd = "allow rwx pool=rbd-data1"
exported keyring for client.tom

# 创建keyring文件
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph-authtool --create-keyring ceph.client.tom.keyring
creating ceph.client.tom.keyring

# 导出用户
cephadmin@ceph-deploy:/data/ceph-cluster$ ceph auth get client.tom -o ceph.client.tom.keyring 
exported keyring for client.tom

# 验证用户的keyring文件
cephadmin@ceph-deploy:/data/ceph-cluster$ cat ceph.client.tom.keyring 
[client.tom]
	key = AQCWMw9lHyZEORAA9brfXF6K0WWMLyz9a/yhPw==
	caps mon = "allow r"
	caps osd = "allow rwx pool=rbd-data1"

1.4 安装ceph客户端

Ubuntu客户端安装ceph-common

apt update
apt install ceph-common -y

1.5 同步普通用户认证文件

从部署服务器发送集群配置文件和秘钥环文件

cephadmin@ceph-deploy:/data/ceph-cluster$ scp ceph.conf ceph.client.tom.keyring root@10.0.0.65:/etc/ceph/
root@10.0.0.65's password: 
ceph.conf                                                                                                                                                                                                                                    100%  298   553.4KB/s   00:00  
ceph.client.tom.keyring

1.6 验证客户端权限

[root@ceph-client ~]#ls /etc/ceph/
ceph.client.tom.keyring  ceph.conf  rbdmap

[root@ceph-client ~]#ceph --user tom -s			# 默认使用admin账户:ceph -s
  cluster:
    id:     28820ae5-8747-4c53-827b-219361781ada
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3 (age 3h)
    mgr: ceph-mgr2(active, since 3h), standbys: ceph-mgr1
    osd: 20 osds: 20 up (since 82m), 20 in (since 2d)
 
  data:
    pools:   4 pools, 97 pgs
    objects: 11 objects, 650 B
    usage:   5.8 GiB used, 20 TiB / 20 TiB avail
    pgs:     97 active+clean

1.7 映射rbd

使用普通用户映射

# 映射rbd
[root@ceph-client ~]#rbd --user tom -p rbd-data1 map data-img2
/dev/rbd0

# 验证rbd
[root@ceph-client ~]#fdisk -l /dev/rbd0
Disk /dev/rbd0: 5 GiB, 5368709120 bytes, 10485760 sectors
Units: sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 65536 bytes / 65536 bytes

[root@ceph-client ~]#lsblk
NAME   MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
loop0    7:0    0 91.9M  1 loop /snap/lxd/24061
loop2    7:2    0   62M  1 loop /snap/core20/1611
loop3    7:3    0   47M  1 loop 
loop4    7:4    0   62M  1 loop /snap/core20/1593
loop5    7:5    0 67.8M  1 loop /snap/lxd/22753
loop6    7:6    0 40.9M  1 loop /snap/snapd/20092
sda      8:0    0   20G  0 disk 
├─sda1   8:1    0    1M  0 part 
├─sda2   8:2    0    1G  0 part /boot
├─sda3   8:3    0    2G  0 part [SWAP]
└─sda4   8:4    0   17G  0 part /
sr0     11:0    1 1024M  0 rom  
rbd0   252:0    0    5G  0 disk 

1.8 格式化磁盘并挂载使用

# 格式化磁盘,xfs格式
[root@ceph-client ~]#mkfs.xfs /dev/rbd0
meta-data=/dev/rbd0              isize=512    agcount=8, agsize=163840 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1
data     =                       bsize=4096   blocks=1310720, imaxpct=25
         =                       sunit=16     swidth=16 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=16 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0

# 新建/data目录并将rbd0挂载至该目录
[root@ceph-client ~]#mkdir /data
[root@ceph-client ~]#mount /dev/rbd0 /data/
# 可以正常向/data添加数据
[root@ceph-client ~]#cp /var/log/syslog /data/
[root@ceph-client ~]#ls /data/
syslog
[root@ceph-client ~]#df -TH
Filesystem     Type      Size  Used Avail Use% Mounted on
udev           devtmpfs  450M     0  450M   0% /dev
tmpfs          tmpfs     100M  1.5M   98M   2% /run
/dev/sda4      xfs        19G  5.0G   14G  28% /
tmpfs          tmpfs     496M     0  496M   0% /dev/shm
tmpfs          tmpfs     5.3M     0  5.3M   0% /run/lock
tmpfs          tmpfs     496M     0  496M   0% /sys/fs/cgroup
/dev/loop2     squashfs   66M   66M     0 100% /snap/core20/1611
/dev/loop5     squashfs   72M   72M     0 100% /snap/lxd/22753
/dev/loop4     squashfs   66M   66M     0 100% /snap/core20/1593
/dev/sda2      xfs       1.1G  363M  701M  35% /boot
tmpfs          tmpfs     100M     0  100M   0% /run/user/0
/dev/loop6     squashfs   43M   43M     0 100% /snap/snapd/20092
/dev/loop0     squashfs   97M   97M     0 100% /snap/lxd/24061
/dev/rbd0      xfs       5.4G   72M  5.3G   2% /data

管理端验证镜像状态

cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls -p rbd-data1 -l
NAME       SIZE   PARENT  FMT  PROT  LOCK
data-img1  3 GiB            2        
data-img2  5 GiB            2        excl			# 施加锁文件,已被客户端映射

1.9 验证ceph内核模块加载

挂载rbd之后系统内核会自动加载libceph.ko模块

[root@ceph-client ~]#lsmod |grep ceph
libceph               327680  1 rbd
libcrc32c              16384  4 btrfs,xfs,raid456,libceph


[root@ceph-client ~]#modinfo libceph
filename:       /lib/modules/5.4.0-124-generic/kernel/net/ceph/libceph.ko
license:        GPL
description:    Ceph core library
author:         Patience Warnick <patience@newdream.net>
author:         Yehuda Sadeh <yehuda@hq.newdream.net>
author:         Sage Weil <sage@newdream.net>
srcversion:     915EC0D99CBE44982F02F3B
depends:        libcrc32c
retpoline:      Y
intree:         Y
name:           libceph
vermagic:       5.4.0-124-generic SMP mod_unload modversions 
sig_id:         PKCS#7
signer:         Build time autogenerated kernel key
sig_key:        5B:BD:39:34:A2:A7:C7:5E:12:68:B3:72:FD:3E:01:8A:98:76:4D:32
sig_hashalgo:   sha512

1.10 开机自动挂载

# 设置开机挂载命令
[root@ceph-client /]#cat /etc/rc.local 
#!/bin/bash
/usr/bin/rbd --user tom -p rbd-data1 map data-img2
mount /dev/rbd0 /data/

[root@ceph-client ~]#chmod a+x /etc/rc.local

reboot重启验证

[root@ceph-client ~]#rbd showmapped 
id  pool       namespace  image      snap  device   
0   rbd-data1             data-img2  -     /dev/rbd0


[root@ceph-client ~]#df -TH
Filesystem     Type      Size  Used Avail Use% Mounted on
udev           devtmpfs  443M     0  443M   0% /dev
tmpfs          tmpfs      98M  1.4M   97M   2% /run
/dev/sda4      xfs        19G  5.7G   13G  32% /
tmpfs          tmpfs     490M     0  490M   0% /dev/shm
tmpfs          tmpfs     5.3M     0  5.3M   0% /run/lock
...
/dev/rbd0      xfs        11G  110M   5G   2% /data

1.11 卸载rbd镜像

umount /data
rbd --user tom -p rbd-data1 unmap data-img2

1.12 删除rbd镜像

镜像删除后数据也会被删除而且是无法恢复,因此在执行删除操作的时候要慎重

# 删除存储池rbd-data1中的data-img1镜像
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd rm --pool rbd-data1 --image data-img1
Removing image: 100% complete...done.

验证镜像

cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls -p rbd-data1 -l
NAME       SIZE    PARENT  FMT  PROT  LOCK
data-img2  10 GiB            2          

2. RBD存储空间回收

删除完成的数据只是标记为已经被删除,但是不会从块存储立即清空

2.1 集群状态

[root@ceph-client ~]#ceph --user tom df
--- RAW STORAGE ---
CLASS    SIZE   AVAIL     USED  RAW USED  %RAW USED
hdd    20 TiB  20 TiB  5.9 GiB   5.9 GiB       0.03
TOTAL  20 TiB  20 TiB  5.9 GiB   5.9 GiB       0.03
 
--- POOLS ---
POOL                   ID  PGS  STORED  OBJECTS    USED  %USED  MAX AVAIL
device_health_metrics   1    1     0 B        0     0 B      0    6.3 TiB
mypool                  2   32     0 B        0     0 B      0    6.3 TiB
myrbd1                  3   32    19 B        3  12 KiB      0    6.3 TiB
rbd-data1               4   32  11 MiB       19  33 MiB      0    6.3 TiB

2.2 创建数据

# 创建200M文件

[root@ceph-client ~]#dd if=/dev/zero of=/data/ceph-test-file bs=1M count=200
200+0 records in
200+0 records out
209715200 bytes (210 MB, 200 MiB) copied, 1.09549 s, 191 MB/s

2.3 ceph验证

[root@ceph-client ~]#ceph --user tom df
--- RAW STORAGE ---
CLASS    SIZE   AVAIL     USED  RAW USED  %RAW USED
hdd    20 TiB  20 TiB  6.5 GiB   6.5 GiB       0.03
TOTAL  20 TiB  20 TiB  6.5 GiB   6.5 GiB       0.03
 
--- POOLS ---
POOL                   ID  PGS   STORED  OBJECTS     USED  %USED  MAX AVAIL
device_health_metrics   1    1      0 B        0      0 B      0    6.3 TiB
mypool                  2   32      0 B        0      0 B      0    6.3 TiB
myrbd1                  3   32     19 B        3   12 KiB      0    6.3 TiB
rbd-data1               4   32  211 MiB       69  633 MiB      0    6.3 TiB		# 已使用空间200M

2.4 删除数据

[root@ceph-client ~]#rm -rf /data/ceph-test-file

删除完成的数据只是标记为已经被删除,但是不会从块存储立即清空,因此在删除完成后使用ceph df 查看并没有回收空间

[root@ceph-client ~]#df -Th
Filesystem     Type      Size  Used Avail Use% Mounted on
udev           devtmpfs  429M     0  429M   0% /dev
tmpfs          tmpfs      95M  1.4M   94M   2% /run
/dev/sda4      xfs        17G  4.7G   13G  28% /
tmpfs          tmpfs     473M     0  473M   0% /dev/shm
tmpfs          tmpfs     5.0M     0  5.0M   0% /run/lock
tmpfs          tmpfs     473M     0  473M   0% /sys/fs/cgroup
/dev/loop2     squashfs   62M   62M     0 100% /snap/core20/1611
/dev/loop5     squashfs   68M   68M     0 100% /snap/lxd/22753
/dev/loop4     squashfs   62M   62M     0 100% /snap/core20/1593
/dev/sda2      xfs      1014M  347M  668M  35% /boot
tmpfs          tmpfs      95M     0   95M   0% /run/user/0
/dev/loop6     squashfs   41M   41M     0 100% /snap/snapd/20092
/dev/loop0     squashfs   92M   92M     0 100% /snap/lxd/24061
/dev/rbd0      xfs       5.0G   69M  5.0G   2% /data					# client显示数据已删除


[root@ceph-client ~]#ceph --user tom df
--- RAW STORAGE ---
CLASS    SIZE   AVAIL     USED  RAW USED  %RAW USED
hdd    20 TiB  20 TiB  6.5 GiB   6.5 GiB       0.03
TOTAL  20 TiB  20 TiB  6.5 GiB   6.5 GiB       0.03
 
--- POOLS ---
POOL                   ID  PGS   STORED  OBJECTS     USED  %USED  MAX AVAIL
device_health_metrics   1    1      0 B        0      0 B      0    6.3 TiB
mypool                  2   32      0 B        0      0 B      0    6.3 TiB
myrbd1                  3   32     19 B        3   12 KiB      0    6.3 TiB
rbd-data1               4   32  211 MiB       69  633 MiB      0    6.3 TiB		# 仍然显示200M,空间未被回收

2.5 fstrim命令立即回收系统层空间

fstrim命令来自于filesystem trim​的缩写,其功能是回收文件系统中未使用的块资源,命令格式为:fstrim -v /data​,其中/data为挂载点,

[root@ceph-client ~]#fstrim -v /data
/data: 5 GiB (5357244416 bytes) trimmed

[root@ceph-client ~]#ceph --user tom df
--- RAW STORAGE ---
CLASS    SIZE   AVAIL     USED  RAW USED  %RAW USED
hdd    20 TiB  20 TiB  5.9 GiB   5.9 GiB       0.03
TOTAL  20 TiB  20 TiB  5.9 GiB   5.9 GiB       0.03
 
--- POOLS ---
POOL                   ID  PGS  STORED  OBJECTS    USED  %USED  MAX AVAIL
device_health_metrics   1    1     0 B        0     0 B      0    6.3 TiB
mypool                  2   32     0 B        0     0 B      0    6.3 TiB
myrbd1                  3   32    19 B        3  12 KiB      0    6.3 TiB
rbd-data1               4   32  11 MiB       68  32 MiB      0    6.3 TiB		# 空间已回收

3. RBD镜像空间动态伸缩

3.1 扩容

cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls -p rbd-data1 -l
NAME       SIZE   PARENT  FMT  PROT  LOCK
data-img1  3 GiB            2        
data-img2  5 GiB            2        excl

# 调整data-img2镜像大小至10G
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd resize --pool rbd-data1 --image data-img2 --size 10G
Resizing image: 100% complete...done.

cephadmin@ceph-deploy:/data/ceph-cluster$ rbd ls -p rbd-data1 -l
NAME       SIZE    PARENT  FMT  PROT  LOCK
data-img1   3 GiB            2        
data-img2  10 GiB            2  				# 扩至10G

3.2 缩容

通常不建议缩容

cephadmin@ceph-deploy:/data/ceph-cluster$ rbd resize --pool rbd-data1 --image data-img1 --size 2G --allow-shrink
Resizing image: 100% complete...done.
cephadmin@ceph-deploy:/data/ceph-cluster$ rbd  ls -p rbd-data1 -l
NAME       SIZE    PARENT  FMT  PROT  LOCK
data-img1   2 GiB            2            			# 缩至2G
data-img2  10 GiB            2        excl

3.3 客户端验证镜像空间

验证客户端

# fdisk已识别10G
[root@ceph-client ~]#fdisk -l /dev/rbd0
Disk /dev/rbd0: 10 GiB, 10737418240 bytes, 20971520 sectors
Units: sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 65536 bytes / 65536 bytes

# 系统还未识别
[root@ceph-client ~]#df -h
Filesystem      Size  Used Avail Use% Mounted on
udev            429M     0  429M   0% /dev
tmpfs            95M  1.4M   94M   2% /run
...
/dev/rbd0       5.0G   69M  5.0G   2% /data			# 没有自动识别

3.4 客户端文件系统手动执行更新

  • 如果是ext{2,3,4}文件系统的话,可以用resize2fs​ 命令来更新。
resize2fs /dev/rbd0
  • 如果是xfs文件系统的话,用xfs_growfs​更新
xfs_growfs /dev/rbd0

挂载点手动执行识别

# 执行xfs_growfs命令
[root@ceph-client ~]#xfs_growfs /dev/rbd0
meta-data=/dev/rbd0              isize=512    agcount=8, agsize=163840 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=1, sparse=1, rmapbt=0
         =                       reflink=1
data     =                       bsize=4096   blocks=1310720, imaxpct=25
         =                       sunit=16     swidth=16 blks
naming   =version 2              bsize=4096   ascii-ci=0, ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=16 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0
data blocks changed from 1310720 to 2621440


[root@ceph-client ~]#df -Th
Filesystem     Type      Size  Used Avail Use% Mounted on
udev           devtmpfs  429M     0  429M   0% /dev
tmpfs          tmpfs      95M  1.4M   94M   2% /run
/dev/sda4      xfs        17G  4.7G   13G  28% /
...
/dev/rbd0      xfs        10G  105M  9.9G   2% /data		# 已更新识别为10G