Hi,
I've got a ceph cluster, 7 nodes, 168 OSDs, with 96G of ram on each server.
Ceph has been instructed to set a memory target of 3G until we increase RAM
to 128G per node. Available memory tends to hover around 14G. I do see a
tiny bit (KB) of swap utilization per ceph-osd process, but there's no
reason for it, so unsure what that's about:
root@ceph02:~# cat /proc/14363/status |egrep 'Name|VmSwap'
*Name*: ceph-osd
*VmSwap*: 464 kB
We're seeing repeated inconsistent PG warnings, generally on the order of
3-10 per week.
pg 2.b9 is active+clean+inconsistent, acting [25,117,128,95,151,15]
PG query on that PG:
INFO:cephadm:Using recent ceph image docker.io/ceph/ceph:v15
{
"snap_trimq": "[]",
"snap_trimq_len": 0,
"state": "active+clean+inconsistent",
"epoch": 20278,
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"acting_recovery_backfill": [
"15(5)",
"25(0)",
"95(3)",
"117(1)",
"128(2)",
"151(4)"
],
"info": {
"pgid": "2.b9s0",
"last_update": "20278'445510",
"last_complete": "20278'445510",
"log_tail": "20278'438137",
"last_user_version": 445510,
"last_backfill": "MAX",
"purged_snaps": [],
"history": {
"epoch_created": 573,
"epoch_pool_created": 100,
"last_epoch_started": 14679,
"last_interval_started": 14678,
"last_epoch_clean": 14716,
"last_interval_clean": 14678,
"last_epoch_split": 573,
"last_epoch_marked_full": 0,
"same_up_since": 14678,
"same_interval_since": 14678,
"same_primary_since": 14396,
"last_scrub": "20278'444009",
"last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"prior_readable_until_ub": 0
},
"stats": {
"version": "20278'445510",
"reported_seq": "896803",
"reported_epoch": "20278",
"state": "active+clean+inconsistent",
"last_fresh": "2020-09-08T18:06:45.463880+0000",
"last_change": "2020-09-08T16:57:22.430293+0000",
"last_active": "2020-09-08T18:06:45.463880+0000",
"last_peered": "2020-09-08T18:06:45.463880+0000",
"last_clean": "2020-09-08T18:06:45.463880+0000",
"last_became_active": "2020-08-06T19:35:02.634999+0000",
"last_became_peered": "2020-08-06T19:35:02.634999+0000",
"last_unstale": "2020-09-08T18:06:45.463880+0000",
"last_undegraded": "2020-09-08T18:06:45.463880+0000",
"last_fullsized": "2020-09-08T18:06:45.463880+0000",
"mapping_epoch": 14678,
"log_start": "20278'438137",
"ondisk_log_start": "20278'438137",
"created": 573,
"last_epoch_clean": 14716,
"parent": "0.0",
"parent_split_bits": 10,
"last_scrub": "20278'444009",
"last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"log_size": 7373,
"ondisk_log_size": 7373,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 322985947136,
"num_objects": 78724,
"num_object_clones": 0,
"num_object_copies": 472344,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 78724,
"num_whiteouts": 0,
"num_read": 430713,
"num_read_kb": 121695928,
"num_write": 445501,
"num_write_kb": 405283436,
"num_scrub_errors": 1,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 1,
"num_objects_recovered": 21,
"num_bytes_recovered": 88080384,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 25,
"acting_primary": 25,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 14679,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
"peer_info": [
{
"peer": "15(5)",
"pgid": "2.b9s5",
"last_update": "20278'445510",
"last_complete": "18934'278187",
"log_tail": "14173'104284",
"last_user_version": 111692,
"last_backfill": "MAX",
"purged_snaps": [],
"history": {
"epoch_created": 573,
"epoch_pool_created": 100,
"last_epoch_started": 14679,
"last_interval_started": 14678,
"last_epoch_clean": 14716,
"last_interval_clean": 14678,
"last_epoch_split": 573,
"last_epoch_marked_full": 0,
"same_up_since": 14678,
"same_interval_since": 14678,
"same_primary_since": 14396,
"last_scrub": "20278'444009",
"last_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"prior_readable_until_ub": 0
},
"stats": {
"version": "14674'111692",
"reported_seq": "127502",
"reported_epoch": "14674",
"state": "active+undersized+degraded",
"last_fresh": "2020-08-06T19:34:47.288930+0000",
"last_change": "2020-08-06T19:31:58.919146+0000",
"last_active": "2020-08-06T19:34:47.288930+0000",
"last_peered": "2020-08-06T19:34:47.288930+0000",
"last_clean": "2020-08-06T19:31:36.499168+0000",
"last_became_active":
"2020-08-06T19:31:58.919146+0000",
"last_became_peered":
"2020-08-06T19:31:58.919146+0000",
"last_unstale": "2020-08-06T19:34:47.288930+0000",
"last_undegraded": "2020-08-06T19:31:58.906847+0000",
"last_fullsized": "2020-08-06T19:31:58.906728+0000",
"mapping_epoch": 14678,
"log_start": "14173'104284",
"ondisk_log_start": "14173'104284",
"created": 573,
"last_epoch_clean": 14624,
"parent": "0.0",
"parent_split_bits": 10,
"last_scrub": "14341'106257",
"last_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"last_deep_scrub": "14005'91363",
"last_deep_scrub_stamp":
"2020-08-04T13:36:30.857877+0000",
"last_clean_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"log_size": 7408,
"ondisk_log_size": 7408,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 83721076736,
"num_objects": 19967,
"num_object_clones": 0,
"num_object_copies": 119802,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 19967,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19967,
"num_whiteouts": 0,
"num_read": 938,
"num_read_kb": 288244,
"num_write": 111692,
"num_write_kb": 84295064,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 17,
"num_bytes_recovered": 71303168,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"avail_no_missing": [
"25(0)",
"15(5)",
"95(3)",
"117(1)",
"128(2)"
],
"object_location_counts": [
{
"shards": "15(5),25(0),95(3),117(1),128(2)",
"objects": 19967
}
],
"blocked_by": [],
"up_primary": 25,
"acting_primary": 25,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 14679,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "95(3)",
"pgid": "2.b9s3",
"last_update": "20278'445510",
"last_complete": "18934'278187",
"log_tail": "14173'104284",
"last_user_version": 111692,
"last_backfill": "MAX",
"purged_snaps": [],
"history": {
"epoch_created": 573,
"epoch_pool_created": 100,
"last_epoch_started": 14679,
"last_interval_started": 14678,
"last_epoch_clean": 14716,
"last_interval_clean": 14678,
"last_epoch_split": 573,
"last_epoch_marked_full": 0,
"same_up_since": 14678,
"same_interval_since": 14678,
"same_primary_since": 14396,
"last_scrub": "20278'444009",
"last_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"prior_readable_until_ub": 0
},
"stats": {
"version": "14674'111692",
"reported_seq": "127502",
"reported_epoch": "14674",
"state": "active+undersized+degraded",
"last_fresh": "2020-08-06T19:34:47.288930+0000",
"last_change": "2020-08-06T19:31:58.919146+0000",
"last_active": "2020-08-06T19:34:47.288930+0000",
"last_peered": "2020-08-06T19:34:47.288930+0000",
"last_clean": "2020-08-06T19:31:36.499168+0000",
"last_became_active":
"2020-08-06T19:31:58.919146+0000",
"last_became_peered":
"2020-08-06T19:31:58.919146+0000",
"last_unstale": "2020-08-06T19:34:47.288930+0000",
"last_undegraded": "2020-08-06T19:31:58.906847+0000",
"last_fullsized": "2020-08-06T19:31:58.906728+0000",
"mapping_epoch": 14678,
"log_start": "14173'104284",
"ondisk_log_start": "14173'104284",
"created": 573,
"last_epoch_clean": 14624,
"parent": "0.0",
"parent_split_bits": 10,
"last_scrub": "14341'106257",
"last_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"last_deep_scrub": "14005'91363",
"last_deep_scrub_stamp":
"2020-08-04T13:36:30.857877+0000",
"last_clean_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"log_size": 7408,
"ondisk_log_size": 7408,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 83721076736,
"num_objects": 19967,
"num_object_clones": 0,
"num_object_copies": 119802,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 19967,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19967,
"num_whiteouts": 0,
"num_read": 938,
"num_read_kb": 288244,
"num_write": 111692,
"num_write_kb": 84295064,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 17,
"num_bytes_recovered": 71303168,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"avail_no_missing": [
"25(0)",
"15(5)",
"95(3)",
"117(1)",
"128(2)"
],
"object_location_counts": [
{
"shards": "15(5),25(0),95(3),117(1),128(2)",
"objects": 19967
}
],
"blocked_by": [],
"up_primary": 25,
"acting_primary": 25,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 14679,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "117(1)",
"pgid": "2.b9s1",
"last_update": "20278'445510",
"last_complete": "18934'278187",
"log_tail": "14173'104284",
"last_user_version": 111692,
"last_backfill": "MAX",
"purged_snaps": [],
"history": {
"epoch_created": 573,
"epoch_pool_created": 100,
"last_epoch_started": 14679,
"last_interval_started": 14678,
"last_epoch_clean": 14716,
"last_interval_clean": 14678,
"last_epoch_split": 573,
"last_epoch_marked_full": 0,
"same_up_since": 14678,
"same_interval_since": 14678,
"same_primary_since": 14396,
"last_scrub": "20278'444009",
"last_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"prior_readable_until_ub": 0
},
"stats": {
"version": "14674'111692",
"reported_seq": "127502",
"reported_epoch": "14674",
"state": "active+undersized+degraded",
"last_fresh": "2020-08-06T19:34:47.288930+0000",
"last_change": "2020-08-06T19:31:58.919146+0000",
"last_active": "2020-08-06T19:34:47.288930+0000",
"last_peered": "2020-08-06T19:34:47.288930+0000",
"last_clean": "2020-08-06T19:31:36.499168+0000",
"last_became_active":
"2020-08-06T19:31:58.919146+0000",
"last_became_peered":
"2020-08-06T19:31:58.919146+0000",
"last_unstale": "2020-08-06T19:34:47.288930+0000",
"last_undegraded": "2020-08-06T19:31:58.906847+0000",
"last_fullsized": "2020-08-06T19:31:58.906728+0000",
"mapping_epoch": 14678,
"log_start": "14173'104284",
"ondisk_log_start": "14173'104284",
"created": 573,
"last_epoch_clean": 14624,
"parent": "0.0",
"parent_split_bits": 10,
"last_scrub": "14341'106257",
"last_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"last_deep_scrub": "14005'91363",
"last_deep_scrub_stamp":
"2020-08-04T13:36:30.857877+0000",
"last_clean_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"log_size": 7408,
"ondisk_log_size": 7408,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 83721076736,
"num_objects": 19967,
"num_object_clones": 0,
"num_object_copies": 119802,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 19967,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19967,
"num_whiteouts": 0,
"num_read": 938,
"num_read_kb": 288244,
"num_write": 111692,
"num_write_kb": 84295064,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 17,
"num_bytes_recovered": 71303168,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"avail_no_missing": [
"25(0)",
"15(5)",
"95(3)",
"117(1)",
"128(2)"
],
"object_location_counts": [
{
"shards": "15(5),25(0),95(3),117(1),128(2)",
"objects": 19967
}
],
"blocked_by": [],
"up_primary": 25,
"acting_primary": 25,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 14679,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "128(2)",
"pgid": "2.b9s2",
"last_update": "20278'445510",
"last_complete": "18934'278187",
"log_tail": "14173'104284",
"last_user_version": 111692,
"last_backfill": "MAX",
"purged_snaps": [],
"history": {
"epoch_created": 573,
"epoch_pool_created": 100,
"last_epoch_started": 14679,
"last_interval_started": 14678,
"last_epoch_clean": 14716,
"last_interval_clean": 14678,
"last_epoch_split": 573,
"last_epoch_marked_full": 0,
"same_up_since": 14678,
"same_interval_since": 14678,
"same_primary_since": 14396,
"last_scrub": "20278'444009",
"last_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"prior_readable_until_ub": 0
},
"stats": {
"version": "14674'111692",
"reported_seq": "127502",
"reported_epoch": "14674",
"state": "active+undersized+degraded",
"last_fresh": "2020-08-06T19:34:47.288930+0000",
"last_change": "2020-08-06T19:31:58.919146+0000",
"last_active": "2020-08-06T19:34:47.288930+0000",
"last_peered": "2020-08-06T19:34:47.288930+0000",
"last_clean": "2020-08-06T19:31:36.499168+0000",
"last_became_active":
"2020-08-06T19:31:58.919146+0000",
"last_became_peered":
"2020-08-06T19:31:58.919146+0000",
"last_unstale": "2020-08-06T19:34:47.288930+0000",
"last_undegraded": "2020-08-06T19:31:58.906847+0000",
"last_fullsized": "2020-08-06T19:31:58.906728+0000",
"mapping_epoch": 14678,
"log_start": "14173'104284",
"ondisk_log_start": "14173'104284",
"created": 573,
"last_epoch_clean": 14624,
"parent": "0.0",
"parent_split_bits": 10,
"last_scrub": "14341'106257",
"last_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"last_deep_scrub": "14005'91363",
"last_deep_scrub_stamp":
"2020-08-04T13:36:30.857877+0000",
"last_clean_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"log_size": 7408,
"ondisk_log_size": 7408,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 83721076736,
"num_objects": 19967,
"num_object_clones": 0,
"num_object_copies": 119802,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 19967,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19967,
"num_whiteouts": 0,
"num_read": 938,
"num_read_kb": 288244,
"num_write": 111692,
"num_write_kb": 84295064,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 17,
"num_bytes_recovered": 71303168,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"avail_no_missing": [
"25(0)",
"15(5)",
"95(3)",
"117(1)",
"128(2)"
],
"object_location_counts": [
{
"shards": "15(5),25(0),95(3),117(1),128(2)",
"objects": 19967
}
],
"blocked_by": [],
"up_primary": 25,
"acting_primary": 25,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 14679,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "151(4)",
"pgid": "2.b9s4",
"last_update": "20278'445510",
"last_complete": "14671'111684",
"log_tail": "14173'104284",
"last_user_version": 111684,
"last_backfill": "MAX",
"purged_snaps": [],
"history": {
"epoch_created": 573,
"epoch_pool_created": 100,
"last_epoch_started": 14679,
"last_interval_started": 14678,
"last_epoch_clean": 14716,
"last_interval_clean": 14678,
"last_epoch_split": 573,
"last_epoch_marked_full": 0,
"same_up_since": 14678,
"same_interval_since": 14678,
"same_primary_since": 14396,
"last_scrub": "20278'444009",
"last_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_deep_scrub": "20278'444009",
"last_deep_scrub_stamp":
"2020-09-08T16:57:22.430246+0000",
"last_clean_scrub_stamp":
"2020-09-07T06:34:26.320796+0000",
"prior_readable_until_ub": 0
},
"stats": {
"version": "14671'111684",
"reported_seq": "127482",
"reported_epoch": "14671",
"state": "active+clean",
"last_fresh": "2020-08-06T19:31:36.499168+0000",
"last_change": "2020-08-06T19:28:38.923454+0000",
"last_active": "2020-08-06T19:31:36.499168+0000",
"last_peered": "2020-08-06T19:31:36.499168+0000",
"last_clean": "2020-08-06T19:31:36.499168+0000",
"last_became_active":
"2020-08-06T19:28:15.372420+0000",
"last_became_peered":
"2020-08-06T19:28:15.372420+0000",
"last_unstale": "2020-08-06T19:31:36.499168+0000",
"last_undegraded": "2020-08-06T19:31:36.499168+0000",
"last_fullsized": "2020-08-06T19:31:36.499168+0000",
"mapping_epoch": 14678,
"log_start": "14173'104284",
"ondisk_log_start": "14173'104284",
"created": 573,
"last_epoch_clean": 14624,
"parent": "0.0",
"parent_split_bits": 10,
"last_scrub": "14341'106257",
"last_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"last_deep_scrub": "14005'91363",
"last_deep_scrub_stamp":
"2020-08-04T13:36:30.857877+0000",
"last_clean_scrub_stamp":
"2020-08-06T00:08:25.447555+0000",
"log_size": 7400,
"ondisk_log_size": 7400,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 83704299520,
"num_objects": 19963,
"num_object_clones": 0,
"num_object_copies": 119778,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19963,
"num_whiteouts": 0,
"num_read": 938,
"num_read_kb": 288244,
"num_write": 111684,
"num_write_kb": 84278680,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 17,
"num_bytes_recovered": 71303168,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
25,
117,
128,
95,
151,
15
],
"acting": [
25,
117,
128,
95,
151,
15
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 25,
"acting_primary": 25,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 14679,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Active",
"enter_time": "2020-08-06T19:35:02.502760+0000",
"might_have_unfound": [
{
"osd": "15(5)",
"status": "already probed"
},
{
"osd": "95(3)",
"status": "already probed"
},
{
"osd": "117(1)",
"status": "already probed"
},
{
"osd": "128(2)",
"status": "already probed"
},
{
"osd": "151(4)",
"status": "already probed"
}
],
"recovery_progress": {
"backfill_targets": [],
"waiting_on_backfill": [],
"last_backfill_started": "MIN",
"backfill_info": {
"begin": "MIN",
"end": "MIN",
"objects": []
},
"peer_backfill_info": [],
"backfills_in_flight": [],
"recovering": [],
"pg_backend": {
"recovery_ops": [],
"read_ops": []
}
}
},
{
"name": "Started",
"enter_time": "2020-08-06T19:35:01.477315+0000"
},
{
"scrubber.epoch_start": "14678",
"scrubber.active": false,
"scrubber.state": "INACTIVE",
"scrubber.start": "MIN",
"scrubber.end": "MIN",
"scrubber.max_end": "MIN",
"scrubber.subset_last_update": "0'0",
"scrubber.deep": false,
"scrubber.waiting_on_whom": []
}
],
"agent_state": {}
}
Every time we look at them, we see the same checksum (0x6706be76):
debug 2020-08-13T18:39:01.731+0000 7fbc037a7700 -1
bluestore(/var/lib/ceph/osd/ceph-25) _verify_csum bad crc32c/0x1000
checksum at blob offset 0x0, got 0x6706be76, expected 0x61f2021c, device
location [0x12b403c0000~1000], logical extent 0x0~1000, object
2#2:0f1a338f:::rbd_data.3.20d195d612942.0000000001db869b:head#
This looks a lot like:
https://tracker.ceph.com/issues/22464
That said, we've got the following versions in play (cluster was created
with 15.2.3):
ceph version 15.2.4 (7447c15c6ff58d7fce91843b705a268a1917325c) octopus
(stable)
This is a containerized cephadm installation, in case it's relevant.
Distribution is Ubuntu 18.04.04, kernel is the HWE kernel:
Linux ceph02 5.4.0-42-generic #46~18.04.1-Ubuntu SMP Fri Jul 10 07:21:24
UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
A repair operation 'fixes' it. These are occurring across many PGs, on the
various different servers, and we see no indication of any hardware related
issues.
Any ideas what to do next?