Hi all:
I have a ceph cluster, the version is 12.2.12.
this is my ceph osd tree:
[root@node-1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-25 2.78760 root rack-test
-26 0.92920 rack rack_1
-37 0.92920 host host_1
12 ssd 0.92920 osd.12 up 1.00000 1.00000
-27 0.92920 rack rack_2
-38 0.92920 host host_2
6 ssd 0.92920 osd.6 up 1.00000 1.00000
-28 0.92920 rack rack_3
-39 0.92920 host host_3
18 ssd 0.92920 osd.18 up 1.00000 1.00000
I have a pool in cluster:
pool 14 'gyt-test' replicated size 2 min_size 1 crush_rule 2 object_hash rjenkins
pg_num 128 pgp_num 128 last_change 5864 lfor 0/5828 flags hashpspool stripe_width 0
removed_snaps [1~3]
crush_rule 2 dump:
{
"rule_id": 2,
"rule_name": "replicated_rule_rack",
"ruleset": 2,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -25,
"item_name": "rack-test"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "rack"
},
{
"op": "emit"
}
]
}
I have a rbd images in gyt-test pool:
[root@node-1 ~]# rbd ls gyt-test
gyt-test
now, I use fio tool to test this rbd images:
[root@node-1 ~]# fio --ioengine=rbd --pool=gyt-test --rbdname=gyt-test --rw=randwrite
--bs=4k --numjobs=1 --runtime=120 --iodepth=128 --clientname=admin --direct=1 --name=test
--time_based=1 --eta-newline 1
test: (g=0): rw=randwrite, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B,
ioengine=rbd, iodepth=128
fio-3.1
Starting 1 process
Jobs: 1 (f=1): [w(1)][2.5%][r=0KiB/s,w=42.2MiB/s][r=0,w=10.8k IOPS][eta 01m:57s]
Jobs: 1 (f=1): [w(1)][4.2%][r=0KiB/s,w=57.7MiB/s][r=0,w=14.8k IOPS][eta 01m:55s]
Jobs: 1 (f=1): [w(1)][5.8%][r=0KiB/s,w=52.4MiB/s][r=0,w=13.4k IOPS][eta 01m:53s]
Jobs: 1 (f=1): [w(1)][7.5%][r=0KiB/s,w=61.1MiB/s][r=0,w=15.6k IOPS][eta 01m:51s]
Jobs: 1 (f=1): [w(1)][9.2%][r=0KiB/s,w=30.0MiB/s][r=0,w=7927 IOPS][eta 01m:49s]
Jobs: 1 (f=1): [w(1)][10.8%][r=0KiB/s,w=59.1MiB/s][r=0,w=15.1k IOPS][eta 01m:47s]
Jobs: 1 (f=1): [w(1)][12.5%][r=0KiB/s,w=51.6MiB/s][r=0,w=13.2k IOPS][eta 01m:45s]
Jobs: 1 (f=1): [w(1)][14.2%][r=0KiB/s,w=58.3MiB/s][r=0,w=14.9k IOPS][eta 01m:43s]
Jobs: 1 (f=1): [w(1)][15.8%][r=0KiB/s,w=56.1MiB/s][r=0,w=14.4k IOPS][eta 01m:41s]
Jobs: 1 (f=1): [w(1)][17.5%][r=0KiB/s,w=44.8MiB/s][r=0,w=11.5k IOPS][eta 01m:39s]
This is normal
And then, I move host_1 bucket to rack-test:
[root@node-1 ~]# ceph osd crush move host_1 root=rack-test
moved item id -37 name 'host_1' to location {root=gyt-test} in crush map
[root@node-1 ~]# ceph osd tree
-25 2.78760 root rack-test
-37 0.92920 host host_1
12 ssd 0.92920 osd.12 up 1.00000 1.00000
-26 0 rack rack_1
-27 0.92920 rack rack_2
-38 0.92920 host host_2
6 ssd 0.92920 osd.6 up 1.00000 1.00000
-28 0.92920 rack rack_3
-39 0.92920 host host_3
18 ssd 0.92920 osd.18 up 1.00000 1.00000
use fio tool test gyt-test rbd again:
[root@node-1 ~]# fio --ioengine=rbd --pool=gyt-test --rbdname=gyt-test --rw=randwrite
--bs=4k --numjobs=1 --runtime=120 --iodepth=64 --clientname=admin --direct=1 --name=test
--time_based=1 --eta-newline 1
test: (g=0): rw=randwrite, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B,
ioengine=rbd, iodepth=64
fio-3.1
Starting 1 process
Jobs: 1 (f=1): [w(1)][2.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:57s]
Jobs: 1 (f=1): [w(1)][4.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:55s]
Jobs: 1 (f=1): [w(1)][5.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:53s]
Jobs: 1 (f=1): [w(1)][7.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:51s]
Jobs: 1 (f=1): [w(1)][9.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:49s]
Jobs: 1 (f=1): [w(1)][10.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:47s]
Jobs: 1 (f=1): [w(1)][12.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:45s]
Jobs: 1 (f=1): [w(1)][14.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:43s]
Jobs: 1 (f=1): [w(1)][15.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:41s]
Jobs: 1 (f=1): [w(1)][17.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:39s]
Jobs: 1 (f=1): [w(1)][19.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:37s]
Jobs: 1 (f=1): [w(1)][20.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:35s]
Jobs: 1 (f=1): [w(1)][22.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:33s]
Jobs: 1 (f=1): [w(1)][24.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:31s]
Jobs: 1 (f=1): [w(1)][25.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:29s]
Jobs: 1 (f=1): [w(1)][27.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:27s]
Jobs: 1 (f=1): [w(1)][29.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:25s]
Jobs: 1 (f=1): [w(1)][30.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:23s]
Jobs: 1 (f=1): [w(1)][32.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:21s]
Jobs: 1 (f=1): [w(1)][34.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:19s]
Jobs: 1 (f=1): [w(1)][35.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:17s]
Jobs: 1 (f=1): [w(1)][37.5%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:15s]
Jobs: 1 (f=1): [w(1)][39.2%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:13s]
Jobs: 1 (f=1): [w(1)][40.8%][r=0KiB/s,w=0KiB/s][r=0,w=0 IOPS][eta 01m:11s]
rbd gyt-test can not handle io
ceph status at this time:
[root@node-1 ~]# ceph -s
cluster:
id: 896eeeed-6f14-4fb0-8d0d-00adda67a36c
health: HEALTH_WARN
8782/299948 objects misplaced (2.928%)
services:
mon: 3 daemons, quorum node-1,node-2,node-3
mgr: node-1(active)
osd: 18 osds: 18 up, 18 in; 75 remapped pgs
flags nodeep-scrub
rbd-mirror: 1 daemon active
rgw: 1 daemon active
data:
pools: 14 pools, 1024 pgs
objects: 104.25k objects, 371GiB
usage: 641GiB used, 11.5TiB / 12.1TiB avail
pgs: 8782/299948 objects misplaced (2.928%)
949 active+clean
75 active+clean+remapped
io:
client: 106KiB/s rd, 567KiB/s wr, 127op/s rd, 52op/s wr
my all pgs is active+clean, remapped pg is already stop recovery, this is no normal.
[root@node-1 ~]# ceph pg dump | grep ^14 | grep "\[12"
dumped all
14.37 106 0 0 0 0 441110528 1733 1733
active+clean 2020-09-29 00:52:21.808536 6226'33265 6278:34441 [12,18]
12 [12,18] 12 0'0 2020-09-28 11:35:49.973497
0'0 2020-09-28 11:31:12.912584 0
14.1c 93 0 0 0 0 383131648 1512 1512
active+clean 2020-09-29 00:45:18.475265 6226'28692 6278:29883 [12,6]
12 [12,6] 12 0'0 2020-09-28 11:34:55.543085
0'0 2020-09-28 11:31:12.912584 0
14.10 82 0 0 0 0 340963328 1505 1505
active+clean 2020-09-29 00:45:18.474759 6226'26031 6278:27257 [12,6]
12 [12,6] 12 0'0 2020-09-28 11:34:37.400091
0'0 2020-09-28 11:31:12.912584 0
14.2c 85 0 0 0 0 353435648 1677 1677
active+clean 2020-09-29 00:51:24.528368 6226'25262 6278:26344 [12,18]
12 [12,18] 12 0'0 2020-09-28 11:35:28.806418
0'0 2020-09-28 11:31:12.912584 0
14.26 106 0 0 0 0 440807424 1534 1534
active+clean 2020-09-29 00:45:18.474631 6226'33003 6278:34306 [12,6]
12 [12,6] 12 0'0 2020-09-28 11:35:01.590900
0'0 2020-09-28 11:31:12.912584 0
14.6a 78 0 0 0 0 323850240 1671 1671
active+clean 2020-09-29 00:50:57.336812 6226'23954 6278:25009 [12,18]
12 [12,18] 12 0'0 2020-09-28 11:36:06.101612
0'0 2020-09-28 11:31:12.912584 0
these pg primary osd is 12, I feel it unable to understand that osd 12 already move out
gyt-test pool crush rule. it seems a BUG?