On Sun, May 2, 2021 at 11:15 PM Magnus Harlander <magnus(a)harlan.de> wrote:
Hi,
I know there is a thread about problems with mounting cephfs with 5.11 kernels.
I tried everything that's mentioned there, but I still can not mount a cephfs
from an octopus node.
I verified:
- I can not mount with 5.11 client kernels (fedora 33 and ubuntu 21.04)
- I can mount with 5.10 client kernels
- It is not due to ipv4/ipv6. I'm not using ipv6
- I'm using a cluster network on a private network segment. Because this was
mentioned as a possible cause for the problems (next to ipv6)
I removed the cluster network and now I'm using the same network for osd syncs and
client connections. It did not help.
- mount returns with a timeout and error after about 1 minute
- I tried the ms_mode=legacy (and others) mount options. Nothing helped
- I tried to use IP:PORT:/fs to mount to exclude DNS as the cause. Didn't help.
- I did setup a similar test cluster on a few VMs and did not have a problem with
mouting.
Even used cluster networks, which also worked fine.
I'm running out of ideas? Any help would be appreciated.
\Magnus
My Setup:
SERVER OS:
==========
[root@s1 ~]# hostnamectl
Static hostname: s1.harlan.de
Icon name: computer-desktop
Chassis: desktop
Machine ID: 3a0a6308630842ffad6b9bb8be4c7547
Boot ID: ffb2948d3934419dafceb0990316d9fd
Operating System: CentOS Linux 8
CPE OS Name: cpe:/o:centos:centos:8
Kernel: Linux 4.18.0-240.22.1.el8_3.x86_64
Architecture: x86-64
CEPH VERSION:
=============
ceph version 15.2.11 (e3523634d9c2227df9af89a4eac33d16738c49cb) octopus (stable)
CLIENT OS:
==========
[root@islay ~]# hostnamectl
Static hostname: islay
Icon name: computer-laptop
Chassis: laptop
Machine ID: 6de7b27dfd864e9ea52b8b0cff47cdfc
Boot ID: 6d8d8bb36f274458b2b761b0a046c8ad
Operating System: Fedora 33 (Workstation Edition)
CPE OS Name: cpe:/o:fedoraproject:fedora:33
Kernel: Linux 5.11.16-200.fc33.x86_64
Architecture: x86-64
CEPH VERSION:
=============
[root@islay harlan]# ceph version
ceph version 15.2.11 (e3523634d9c2227df9af89a4eac33d16738c49cb) octopus (stable)
[root@s1 ~]# ceph version
ceph version 15.2.11 (e3523634d9c2227df9af89a4eac33d16738c49cb) octopus (stable)
FSTAB ENTRY:
============
cfs0,cfs1:/fs /data/fs ceph rw,_netdev,name=admin,secretfile=/etc/ceph/fs.secret
0 0
IP CONFIG MON/OSD NODE (s1)
=======================
[root@s1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default
qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: enp4s0: <BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP> mtu 1500 qdisc fq_codel master
bond0 state UP group default qlen 1000
link/ether 98:de:d0:04:26:86 brd ff:ff:ff:ff:ff:ff
3: enp5s0: <BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP> mtu 1500 qdisc fq_codel master
bond0 state UP group default qlen 1000
link/ether a8:a1:59:18:e7:ea brd ff:ff:ff:ff:ff:ff
4: vmbr: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group
default qlen 1000
link/ether 98:de:d0:04:26:86 brd ff:ff:ff:ff:ff:ff
inet 192.168.200.111/24 brd 192.168.200.255 scope global noprefixroute vmbr
valid_lft forever preferred_lft forever
inet 192.168.200.141/24 brd 192.168.200.255 scope global secondary noprefixroute
vmbr
valid_lft forever preferred_lft forever
inet 192.168.200.101/24 brd 192.168.200.255 scope global secondary vmbr
valid_lft forever preferred_lft forever
inet6 fe80::be55:705d:7c9e:eaa4/64 scope link noprefixroute
valid_lft forever preferred_lft forever
5: bond0: <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> mtu 1500 qdisc noqueue master
vmbr state UP group default qlen 1000
link/ether 98:de:d0:04:26:86 brd ff:ff:ff:ff:ff:ff
6: virbr0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN
group default qlen 1000
link/ether 52:54:00:32:ea:2f brd ff:ff:ff:ff:ff:ff
inet 192.168.122.1/24 brd 192.168.122.255 scope global virbr0
valid_lft forever preferred_lft forever
7: virbr0-nic: <BROADCAST,MULTICAST> mtu 1500 qdisc fq_codel master virbr0 state
DOWN group default qlen 1000
link/ether 52:54:00:32:ea:2f brd ff:ff:ff:ff:ff:ff
8: vnet0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master vmbr
state UNKNOWN group default qlen 1000
link/ether fe:54:00:67:4d:15 brd ff:ff:ff:ff:ff:ff
inet6 fe80::fc54:ff:fe67:4d15/64 scope link
valid_lft forever preferred_lft forever
CEPH STATUS:
============
[root@s1 ~]# ceph -s
cluster:
id: 86bbd6c5-ae96-4c78-8a5e-50623f0ae524
health: HEALTH_OK
services:
mon: 4 daemons, quorum s0,mbox,s1,r1 (age 6h)
mgr: s1(active, since 6h), standbys: s0
mds: fs:1 {0=s1=up:active} 1 up:standby
osd: 10 osds: 10 up (since 6h), 10 in (since 6h)
data:
pools: 6 pools, 289 pgs
objects: 1.75M objects, 1.6 TiB
usage: 3.3 TiB used, 13 TiB / 16 TiB avail
pgs: 289 active+clean
io:
client: 0 B/s rd, 245 KiB/s wr, 0 op/s rd, 4 op/s wr
CEPH OSD TREE:
==============
[root@s1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 16.99994 root default
-9 8.39996 host s0
1 hdd 4.00000 osd.1 up 1.00000 1.00000
5 hdd 1.79999 osd.5 up 1.00000 1.00000
9 hdd 1.79999 osd.9 up 1.00000 1.00000
3 ssd 0.50000 osd.3 up 1.00000 1.00000
4 ssd 0.29999 osd.4 up 1.00000 1.00000
-12 8.59998 host s1
6 hdd 1.79999 osd.6 up 1.00000 1.00000
7 hdd 1.79999 osd.7 up 1.00000 1.00000
8 hdd 4.00000 osd.8 up 1.00000 1.00000
0 ssd 0.50000 osd.0 up 1.00000 1.00000
2 ssd 0.50000 osd.2 up 1.00000 1.00000
CEPH MON STAT:
==============
[root@s1 ~]# ceph mon stat
e19: 4 mons at
{mbox=[v2:192.168.200.5:3300/0,v1:192.168.200.5:6789/0],r1=[v2:192.168.200.113:3300/0,v1:192.168.200.113:6789/0],s0=[v2:192.168.200.110:3300/0,v1:192.168.200.110:6789/0],s1=[v2:192.168.200.111:3300/0,v1:192.168.200.111:6789/0]},
election epoch 8618, leader 0 s0, quorum 0,1,2,3 s0,mbox,s1,r1
CEPH FS DUMP:
=============
[root@s1 ~]# ceph fs dump
dumped fsmap epoch 15534
e15534
enable_multiple, ever_enabled_multiple: 0,0
compat: compat={},rocompat={},incompat={1=base v0.20,2=client writeable ranges,3=default
file layouts on dirs,4=dir inode in separate object,5=mds uses versioned
encoding,6=dirfrag is stored in omap,8=no anchor table,9=file layout v2,10=snaprealm v2}
legacy client fscid: 2
Filesystem 'fs' (2)
fs_name fs
epoch 15534
flags 12
created 2021-02-02T18:47:25.306744+0100
modified 2021-05-02T16:33:36.738341+0200
tableserver 0
root 0
session_timeout 60
session_autoclose 300
max_file_size 1099511627776
min_compat_client 0 (unknown)
last_failure 0
last_failure_osd_epoch 64252
compat compat={},rocompat={},incompat={1=base v0.20,2=client writeable
ranges,3=default file layouts on dirs,4=dir inode in separate object,5=mds uses versioned
encoding,6=dirfrag is stored in omap,8=no anchor table,9=file layout v2,10=snaprealm v2}
max_mds 1
in 0
up {0=54782953}
failed
damaged
stopped
data_pools [10]
metadata_pool 11
inline_data disabled
balancer
standby_count_wanted 1
[mds.s1{0:54782953} state up:active seq 816 addr
[v2:192.168.200.111:6800/1895356761,v1:192.168.200.111:6801/1895356761]]
Standby daemons:
[mds.s0{-1:54958514} state up:standby seq 1 addr
[v2:192.168.200.110:6800/297471268,v1:192.168.200.110:6801/297471268]]
CEPH CONF:
==========
[root@s1 ~]# cat /etc/ceph/ceph.conf
[global]
fsid = 86bbd6c5-ae96-4c78-8a5e-50623f0ae524
mon_initial_members = s0, s1, mbox, r1
mon_host = 192.168.200.110,192.168.200.111,192.168.200.5,192.168.200.113
ms_bind_ipv4 = true
ms_bind_ipv6 = false
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
public network = 192.168.200.0/24
[osd]
public network = 192.168.200.0/24
osd_memory_target = 2147483648
osd crush update on start = false
[osd.1]
public addr = 192.168.200.140
osd_memory_target = 2147483648
[osd.3]
public addr = 192.168.200.140
osd_memory_target = 2147483648
[osd.4]
public addr = 192.168.200.140
osd_memory_target = 2147483648
[osd.5]
public addr = 192.168.200.140
osd_memory_target = 2147483648
[osd.9]
public addr = 192.168.200.140
osd_memory_target = 2147483648
[osd.0]
public addr = 192.168.200.141
osd_memory_target = 2147483648
[osd.2]
public addr = 192.168.200.141
osd_memory_target = 2147483648
[osd.6]
public addr = 192.168.200.141
osd_memory_target = 2147483648
[osd.7]
public addr = 192.168.200.141
osd_memory_target = 2147483648
[osd.8]
public addr = 192.168.200.141
osd_memory_target = 2147483648
CEPH FS STAT
============
[root@s1 ~]# ceph fs status
fs - 0 clients
==
RANK STATE MDS ACTIVITY DNS INOS
0 active s1 Reqs: 0 /s 0 0
POOL TYPE USED AVAIL
cfs_md metadata 2365M 528G
cfs data 2960G 4967G
STANDBY MDS
s0
VERSION
DAEMONS
None s1
ceph version 15.2.11 (e3523634d9c2227df9af89a4eac33d16738c49cb) octopus (stable) s0
CLIENT JOURNALCTL WHEN MOUNTING
===============================
May 02 22:54:04 islay kernel: FS-Cache: Loaded
May 02 22:54:05 islay kernel: Key type ceph registered
May 02 22:54:05 islay kernel: libceph: loaded (mon/osd proto 15/24)
May 02 22:54:05 islay kernel: FS-Cache: Netfs 'ceph' registered for caching
May 02 22:54:05 islay kernel: ceph: loaded (mds proto 32)
May 02 22:54:05 islay kernel: libceph: mon1 (1)192.168.200.111:6789 session established
May 02 22:54:05 islay kernel: libceph: mon1 (1)192.168.200.111:6789 socket closed (con
state OPEN)
May 02 22:54:05 islay kernel: libceph: mon1 (1)192.168.200.111:6789 session lost, hunting
for new mon
May 02 22:54:05 islay kernel: libceph: mon0 (1)192.168.200.5:6789 session established
May 02 22:54:05 islay kernel: libceph: no match of type 1 in addrvec
May 02 22:54:05 islay kernel: libceph: corrupt full osdmap (-2) epoch 64281 off 3154
(00000000a90fe1d7 of 000000000083f4bd-00000000c03bdc9b)
May 02 22:54:05 islay kernel: osdmap: 00000000: 08 07 4f 24 00 00 09 01 9e 12 00 00 86 bb
d6 c5 ..O$............
May 02 22:54:05 islay kernel: osdmap: 00000010: ae 96 4c 78 8a 5e 50 62 3f 0a e5 24 19 fb
00 00 ..Lx.^Pb?..$....
May 02 22:54:05 islay kernel: osdmap: 00000020: 54 f0 53 5d 3a fd ae 0e 1b 07 8f 60 b3 8e
d2 2f T.S]:......`.../
May 02 22:54:05 islay kernel: osdmap: 00000030: 06 00 00 00 02 00 00 00 00 00 00 00 1d 05
44 01 ..............D.
May 02 22:54:05 islay kernel: osdmap: 00000040: 00 00 01 02 02 02 20 00 00 00 20 00 00 00
00 00 ...... ... .....
May 02 22:54:05 islay kernel: osdmap: 00000050: 00 00 00 00 00 00 5e fa 00 00 2e 04 00 00
00 00 ......^.........
May 02 22:54:05 islay kernel: osdmap: 00000060: 00 00 5e fa 00 00 00 00 00 00 00 00 00 00
00 00 ..^.............
..... many more lines, i can provide if they are useful.
CEPH OSDMAP:
============
epoch 64281
fsid 86bbd6c5-ae96-4c78-8a5e-50623f0ae524
created 2019-08-14T13:28:20.246349+0200
modified 2021-05-02T22:10:03.802328+0200
flags sortbitwise,recovery_deletes,purged_snapdirs,pglog_hardlimit
crush_version 140
full_ratio 0.92
backfillfull_ratio 0.9
nearfull_ratio 0.88
require_min_compat_client jewel
min_compat_client jewel
require_osd_release octopus
pool 2 'vms' replicated size 2 min_size 1 crush_rule 2 object_hash rjenkins
pg_num 32 pgp_num 32 autoscale_mode on last_change 64094 lfor 0/62074/62072 flags
hashpspool,selfmanaged_snaps stripe_width 0 application rbd
pool 8 'ssdpool' replicated size 2 min_size 1 crush_rule 2 object_hash rjenkins
pg_num 32 pgp_num 32 autoscale_mode on last_change 61436 lfor 0/61436/61434 flags
hashpspool stripe_width 0
pool 9 'hddpool' replicated size 2 min_size 1 crush_rule 1 object_hash rjenkins
pg_num 32 pgp_num 32 autoscale_mode on last_change 61413 lfor 0/61413/61411 flags
hashpspool stripe_width 0
pool 10 'cfs' replicated size 2 min_size 1 crush_rule 1 object_hash rjenkins
pg_num 128 pgp_num 128 autoscale_mode on last_change 63328 flags
hashpspool,selfmanaged_snaps stripe_width 0 application cephfs
pool 11 'cfs_md' replicated size 2 min_size 1 crush_rule 2 object_hash rjenkins
pg_num 64 pgp_num 64 autoscale_mode on last_change 63332 flags hashpspool stripe_width 0
application cephfs
pool 12 'device_health_metrics' replicated size 2 min_size 1 crush_rule 0
object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode off last_change 64255 flags
hashpspool stripe_width 0 application mgr_devicehealth
max_osd 12
osd.0 up in weight 1 up_from 64236 up_thru 64263 down_at 64233 last_clean_interval
[64211,64231) [v2:192.168.200.141:6804/3027,v1:192.168.200.141:6805/3027]
[v2:192.168.200.111:6806/3027,v1:192.168.200.111:6807/3027] exists,up
631bc170-45fd-4948-9a5e-4c278569c0bc
osd.1 up in weight 1 up_from 64259 up_thru 64260 down_at 64249 last_clean_interval
[64223,64248) [v2:192.168.200.140:6811/3066,v1:192.168.200.140:6813/3066]
[v2:192.168.200.110:6813/3066,v1:192.168.200.110:6815/3066] exists,up
660a762c-001d-4160-a9ee-d0acd078e776
osd.2 up in weight 1 up_from 64236 up_thru 64266 down_at 64233 last_clean_interval
[64211,64231) [v2:192.168.200.141:6815/3008,v1:192.168.200.141:6816/3008]
[v2:192.168.200.111:6816/3008,v1:192.168.200.111:6817/3008] exists,up
e4d94d3a-ec58-46a1-b61c-c47dd39012ed
osd.3 up in weight 1 up_from 64256 up_thru 64264 down_at 64249 last_clean_interval
[64221,64248) [v2:192.168.200.140:6800/3067,v1:192.168.200.140:6801/3067]
[v2:192.168.200.110:6802/3067,v1:192.168.200.110:6803/3067] exists,up
26d25060-fd99-4d15-a1b2-ebb77646671e
osd.4 up in weight 1 up_from 64256 up_thru 64264 down_at 64249 last_clean_interval
[64221,64248) [v2:192.168.200.140:6804/3049,v1:192.168.200.140:6806/3049]
[v2:192.168.200.110:6806/3049,v1:192.168.200.110:6807/3049] exists,up
238f197d-ecbc-4588-8a99-6a63c9bb1a17
osd.5 up in weight 1 up_from 64260 up_thru 64260 down_at 64249 last_clean_interval
[64226,64248) [v2:192.168.200.140:6816/3073,v1:192.168.200.140:6817/3073]
[v2:192.168.200.110:6818/3073,v1:192.168.200.110:6819/3073] exists,up
a9dcb26f-0f1c-4067-a26b-a29939285e0b
osd.6 up in weight 1 up_from 64240 up_thru 64260 down_at 64233 last_clean_interval
[64218,64231) [v2:192.168.200.141:6808/3020,v1:192.168.200.141:6809/3020]
[v2:192.168.200.111:6810/3020,v1:192.168.200.111:6811/3020] exists,up
f399b47d-063f-4b2f-bd93-289377dc9945
osd.7 up in weight 1 up_from 64238 up_thru 64260 down_at 64233 last_clean_interval
[64214,64231) [v2:192.168.200.141:6800/3023,v1:192.168.200.141:6801/3023]
[v2:192.168.200.111:6802/3023,v1:192.168.200.111:6803/3023] exists,up
3557ceca-7bd8-401e-abd3-59bee168e8f6
osd.8 up in weight 1 up_from 64242 up_thru 64260 down_at 64233 last_clean_interval
[64216,64231) [v2:192.168.200.141:6812/3017,v1:192.168.200.141:6813/3017]
[v2:192.168.200.111:6814/3017,v1:192.168.200.111:6815/3017] exists,up
7f9cad3f-163d-4bb7-85b2-fffd46982fff
osd.9 up in weight 1 up_from 64257 up_thru 64257 down_at 64249 last_clean_interval
[64229,64248) [v2:192.168.200.140:6805/3053,v1:192.168.200.140:6807/3053]
[v2:192.168.200.110:6808/3053,v1:192.168.200.110:6809/3053] exists,up
c543b12a-f9bf-4b83-af16-f6b8a3926e69
blacklist 192.168.200.110:0/3803039218 expires 2021-05-03T15:33:52.837358+0200
blacklist 192.168.200.111:6800/3725740504 expires 2021-05-03T15:37:38.953040+0200
blacklist 192.168.200.110:6822/3464419 expires 2021-05-03T15:56:28.124585+0200
blacklist 192.168.200.110:6801/838484672 expires 2021-05-03T15:56:13.108594+0200
blacklist 192.168.200.110:6800/838484672 expires 2021-05-03T15:56:13.108594+0200
blacklist 192.168.200.111:6841/159804987 expires 2021-05-03T14:54:05.413130+0200
blacklist 192.168.200.111:6840/159804987 expires 2021-05-03T14:54:05.413130+0200
blacklist 192.168.200.111:6801/3725740504 expires 2021-05-03T15:37:38.953040+0200
blacklist 192.168.200.110:6807/453197 expires 2021-05-03T15:33:52.837358+0200
blacklist 192.168.200.5:6801/3078236863 expires 2021-05-03T14:38:57.694004+0200
blacklist 192.168.200.110:0/1948864559 expires 2021-05-03T15:33:52.837358+0200
blacklist 192.168.200.111:6800/3987205903 expires 2021-05-03T15:32:12.633802+0200
blacklist 192.168.200.111:6800/2342337613 expires 2021-05-03T14:46:57.936272+0200
blacklist 192.168.200.110:0/3020995128 expires 2021-05-03T15:56:28.124585+0200
blacklist 192.168.200.5:6800/3078236863 expires 2021-05-03T14:38:57.694004+0200
blacklist 192.168.200.110:0/2607867017 expires 2021-05-03T15:33:52.837358+0200
blacklist 192.168.200.111:6801/3987205903 expires 2021-05-03T15:32:12.633802+0200
blacklist 192.168.200.110:0/3159222459 expires 2021-05-03T15:56:28.124585+0200
blacklist 192.168.200.110:6806/453197 expires 2021-05-03T15:33:52.837358+0200
blacklist 192.168.200.110:6823/3464419 expires 2021-05-03T15:56:28.124585+0200
blacklist 192.168.200.111:6801/2342337613 expires 2021-05-03T14:46:57.936272+0200
blacklist 192.168.200.111:6800/2205788037 expires 2021-05-03T14:56:56.448631+0200
blacklist 192.168.200.111:6801/2205788037 expires 2021-05-03T14:56:56.448631+0200
Hi Magnus,
What is the output of "ceph config dump"?
Instead of providing those lines, can you run "ceph osd getmap 64281 -o
osdmap.64281" and attach osdmap.64281 file?
Thanks,
Ilya