A lot of pg in inconsistent state occurred.
Most of them were repaired with ceph pg repair all, but in the case of 3 pg as shown
below, it does not proceed further with failed_repair status.
[root@cephvm1 ~]# ceph health detail
HEALTH_ERR 30 scrub errors; Too many repaired reads on 7 OSDs; Possible data damage: 3 pgs
inconsistent
OSD_SCRUB_ERRORS 30 scrub errors
OSD_TOO_MANY_REPAIRS Too many repaired reads on 7 OSDs
osd.29 had 315 reads repaired
osd.23 had 530 reads repaired
osd.18 had 69 reads repaired
osd.2 had 267 reads repaired
osd.0 had 179 reads repaired
osd.12 had 513 reads repaired
osd.13 had 404 reads repaired
PG_DAMAGED Possible data damage: 3 pgs inconsistent
pg 2.2f is active+clean+inconsistent+failed_repair, acting [29,13,18]
pg 2.46 is active+clean+inconsistent+failed_repair, acting [12,0,29]
pg 2.5c is active+clean+inconsistent+failed_repair, acting [12,23,0]
The query result of pg 2.2f is as follows, and the problem seems to be that the three peer
versions are different.
[root@cephvm1 ~]# ceph pg 2.2f query
{
"state": "active+clean+inconsistent+failed_repair",
"snap_trimq": "[]",
"snap_trimq_len": 0,
"epoch": 426,
"up": [
29,
13,
18
],
"acting": [
29,
13,
18
],
"acting_recovery_backfill": [
"13",
"18",
"29"
],
"info": {
"pgid": "2.2f",
"last_update": "426'128436680",
"last_complete": "426'128436680",
"log_tail": "390'128433627",
"last_user_version": 128436529,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 111,
"epoch_pool_created": 67,
"last_epoch_started": 426,
"last_interval_started": 425,
"last_epoch_clean": 426,
"last_interval_clean": 425,
"last_epoch_split": 111,
"last_epoch_marked_full": 0,
"same_up_since": 425,
"same_interval_since": 425,
"same_primary_since": 425,
"last_scrub": "426'128436680",
"last_scrub_stamp": "2023-06-21 15:57:53.645395",
"last_deep_scrub": "426'128436680",
"last_deep_scrub_stamp": "2023-06-21 15:57:53.645395",
"last_clean_scrub_stamp": "2023-03-28 09:11:29.298557"
},
"stats": {
"version": "426'128436680",
"reported_seq": "128628939",
"reported_epoch": "426",
"state": "active+clean+inconsistent+failed_repair",
"last_fresh": "2023-06-21 15:57:53.645450",
"last_change": "2023-06-21 15:57:53.645450",
"last_active": "2023-06-21 15:57:53.645450",
"last_peered": "2023-06-21 15:57:53.645450",
"last_clean": "2023-06-21 15:57:53.645450",
"last_became_active": "2023-06-21 14:03:02.233710",
"last_became_peered": "2023-06-21 14:03:02.233710",
"last_unstale": "2023-06-21 15:57:53.645450",
"last_undegraded": "2023-06-21 15:57:53.645450",
"last_fullsized": "2023-06-21 15:57:53.645450",
"mapping_epoch": 425,
"log_start": "390'128433627",
"ondisk_log_start": "390'128433627",
"created": 111,
"last_epoch_clean": 426,
"parent": "0.0",
"parent_split_bits": 7,
"last_scrub": "426'128436680",
"last_scrub_stamp": "2023-06-21 15:57:53.645395",
"last_deep_scrub": "426'128436680",
"last_deep_scrub_stamp": "2023-06-21 15:57:53.645395",
"last_clean_scrub_stamp": "2023-03-28 09:11:29.298557",
"log_size": 3053,
"ondisk_log_size": 3053,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 10888387166,
"num_objects": 2610,
"num_object_clones": 0,
"num_object_copies": 7830,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 2610,
"num_whiteouts": 0,
"num_read": 191976,
"num_read_kb": 10314827,
"num_write": 128429383,
"num_write_kb": 741542291,
"num_scrub_errors": 3,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 3,
"num_objects_recovered": 28,
"num_bytes_recovered": 113242624,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 28
},
"up": [
29,
13,
18
],
"acting": [
29,
13,
18
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 29,
"acting_primary": 29,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 426,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
"peer_info": [
{
"peer": "13",
"pgid": "2.2f",
"last_update": "426'128436680",
"last_complete": "426'128436680",
"log_tail": "390'128433627",
"last_user_version": 128436529,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 111,
"epoch_pool_created": 67,
"last_epoch_started": 426,
"last_interval_started": 425,
"last_epoch_clean": 426,
"last_interval_clean": 425,
"last_epoch_split": 111,
"last_epoch_marked_full": 0,
"same_up_since": 425,
"same_interval_since": 425,
"same_primary_since": 425,
"last_scrub": "426'128436680",
"last_scrub_stamp": "2023-06-21 15:57:53.645395",
"last_deep_scrub": "426'128436680",
"last_deep_scrub_stamp": "2023-06-21
15:57:53.645395",
"last_clean_scrub_stamp": "2023-03-28
09:11:29.298557"
},
"stats": {
"version": "406'128436652",
"reported_seq": "128628750",
"reported_epoch": "424",
"state": "peering",
"last_fresh": "2023-06-21 14:03:00.219516",
"last_change": "2023-06-21 14:03:00.219516",
"last_active": "2023-06-12 09:43:51.161310",
"last_peered": "2023-04-17 12:38:42.363058",
"last_clean": "2023-04-17 12:38:42.363058",
"last_became_active": "2023-04-17 10:54:25.756138",
"last_became_peered": "2023-04-17 10:54:25.756138",
"last_unstale": "2023-06-21 14:03:00.219516",
"last_undegraded": "2023-06-21 14:03:00.219516",
"last_fullsized": "2023-06-21 14:03:00.219516",
"mapping_epoch": 425,
"log_start": "390'128433627",
"ondisk_log_start": "390'128433627",
"created": 111,
"last_epoch_clean": 418,
"parent": "0.0",
"parent_split_bits": 7,
"last_scrub": "406'128436652",
"last_scrub_stamp": "2023-06-21 08:35:59.025077",
"last_deep_scrub": "406'128436652",
"last_deep_scrub_stamp": "2023-06-21
08:35:59.025077",
"last_clean_scrub_stamp": "2023-03-28
09:11:29.298557",
"log_size": 3025,
"ondisk_log_size": 3025,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 10888387166,
"num_objects": 2610,
"num_object_clones": 0,
"num_object_copies": 7830,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 2610,
"num_whiteouts": 0,
"num_read": 191976,
"num_read_kb": 10314827,
"num_write": 128429383,
"num_write_kb": 741542291,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
29,
13,
18
],
"acting": [
29,
13,
18
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 29,
"acting_primary": 29,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 426,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "18",
"pgid": "2.2f",
"last_update": "426'128436680",
"last_complete": "426'128436680",
"log_tail": "390'128433627",
"last_user_version": 128436529,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 111,
"epoch_pool_created": 67,
"last_epoch_started": 426,
"last_interval_started": 425,
"last_epoch_clean": 426,
"last_interval_clean": 425,
"last_epoch_split": 111,
"last_epoch_marked_full": 0,
"same_up_since": 425,
"same_interval_since": 425,
"same_primary_since": 425,
"last_scrub": "426'128436680",
"last_scrub_stamp": "2023-06-21 15:57:53.645395",
"last_deep_scrub": "426'128436680",
"last_deep_scrub_stamp": "2023-06-21
15:57:53.645395",
"last_clean_scrub_stamp": "2023-03-28
09:11:29.298557"
},
"stats": {
"version": "406'128436651",
"reported_seq": "128628747",
"reported_epoch": "406",
"state": "active+clean+scrubbing+deep",
"last_fresh": "2023-04-17 12:38:42.363058",
"last_change": "2023-04-17 12:35:34.783904",
"last_active": "2023-04-17 12:38:42.363058",
"last_peered": "2023-04-17 12:38:42.363058",
"last_clean": "2023-04-17 12:38:42.363058",
"last_became_active": "2023-04-17 10:54:25.756138",
"last_became_peered": "2023-04-17 10:54:25.756138",
"last_unstale": "2023-04-17 12:38:42.363058",
"last_undegraded": "2023-04-17 12:38:42.363058",
"last_fullsized": "2023-04-17 12:38:42.363058",
"mapping_epoch": 425,
"log_start": "390'128433627",
"ondisk_log_start": "390'128433627",
"created": 111,
"last_epoch_clean": 406,
"parent": "0.0",
"parent_split_bits": 7,
"last_scrub": "390'128260640",
"last_scrub_stamp": "2023-03-28 09:11:29.298557",
"last_deep_scrub": "390'125680773",
"last_deep_scrub_stamp": "2023-03-24
13:37:20.633751",
"last_clean_scrub_stamp": "2023-03-28
09:11:29.298557",
"log_size": 3024,
"ondisk_log_size": 3024,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 10888387166,
"num_objects": 2610,
"num_object_clones": 0,
"num_object_copies": 7830,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 2610,
"num_whiteouts": 0,
"num_read": 191976,
"num_read_kb": 10314827,
"num_write": 128429383,
"num_write_kb": 741542291,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
29,
13,
18
],
"acting": [
29,
13,
18
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 29,
"acting_primary": 29,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 426,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Active",
"enter_time": "2023-06-21 14:03:02.221320",
"might_have_unfound": [],
"recovery_progress": {
"backfill_targets": [],
"waiting_on_backfill": [],
"last_backfill_started": "MIN",
"backfill_info": {
"begin": "MIN",
"end": "MIN",
"objects": []
},
"peer_backfill_info": [],
"backfills_in_flight": [],
"recovering": [],
"pg_backend": {
"pull_from_peer": [],
"pushing": []
}
},
"scrub": {
"scrubber.epoch_start": "425",
"scrubber.active": false,
"scrubber.state": "INACTIVE",
"scrubber.start": "MIN",
"scrubber.end": "MIN",
"scrubber.max_end": "MIN",
"scrubber.subset_last_update": "0'0",
"scrubber.deep": false,
"scrubber.waiting_on_whom": []
}
},
{
"name": "Started",
"enter_time": "2023-06-21 14:03:01.225451"
}
],
"agent_state": {}
}
-------------
"last_user_version": 128436529,
"version": "426'128436680",
"last_user_version": 128436529,
"version": "406'128436652",
"last_user_version": 128436529,
"version": "406'128436651",
Can I repari to a specific version?