Hi all,
I’d recently deployed ceph 15.2.8 with 3(mon,mgr,rgw,mds) and 4 (osd)
total 7 host, however I encountered mgr crash a few times a week, the
crashing mgr can be any one of 3. I couldn’t identify the problem behind
and here is the crash info, appreciate anyone if you have suggestions that
I could narrow it down.
Thank you very much.
{
"assert_condition": "ret == 0",
"assert_file":
"/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc",
"assert_func": "void Thread::create(const char*, size_t)",
"assert_line": 157,
"assert_msg":
"/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc:
In function 'void Thread::create(const char*, size_t)' thread 7f833addc700
time
2021-02-10T20:00:32.980508+0000\n/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc:
157: FAILED ceph_assert(ret == 0)\n",
"assert_thread_name": "mgr-fin",
"backtrace": [
"(()+0x12b20) [0x7f835a51cb20]",
"(gsignal()+0x10f) [0x7f8358f6d7ff]",
"(abort()+0x127) [0x7f8358f57c35]",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char
const*)+0x1a9) [0x7f835c07b735]",
"(()+0x27a8fe) [0x7f835c07b8fe]",
"(()+0x34cef6) [0x7f835c14def6]",
"(DispatchQueue::start()+0x3a) [0x7f835c29697a]",
"(AsyncMessenger::ready()+0xcd) [0x7f835c3340cd]",
"(Messenger::add_dispatcher_head(Dispatcher*)+0x68)
[0x7f835c3f8478]",
"(MonClient::get_monmap_and_config()+0xbb) [0x7f835c3f66ab]",
"(ceph_mount_info::init()+0x4d) [0x7f834298435d]",
"(()+0x3680f) [0x7f8342cd280f]",
"(()+0x19d421) [0x7f835ba5c421]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(()+0x179c78) [0x7f835ba38c78]",
"(()+0x19d1c7) [0x7f835ba5c1c7]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(()+0x179c78) [0x7f835ba38c78]",
"(()+0x19d1c7) [0x7f835ba5c1c7]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(()+0x1221d4) [0x7f835b9e11d4]",
"(()+0x122c55) [0x7f835b9e1c55]",
"(()+0x19cf27) [0x7f835ba5bf27]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(_PyFunction_FastCallDict()+0x122) [0x7f835b9b9ec2]",
"(_PyObject_FastCallDict()+0x70e) [0x7f835b9bac9e]",
"(()+0x10dc70) [0x7f835b9ccc70]",
"(_PyObject_FastCallDict()+0x6ec) [0x7f835b9bac7c]",
"(PyObject_CallFunctionObjArgs()+0xe8) [0x7f835b9dbd48]",
"(_PyEval_EvalFrameDefault()+0x2588) [0x7f835ba5eef8]",
"(()+0xf99b4) [0x7f835b9b89b4]",
"(()+0x179e60) [0x7f835ba38e60]",
"(()+0x19d1c7) [0x7f835ba5c1c7]",
"(_PyEval_EvalFrameDefault()+0x10d5) [0x7f835ba5da45]",
"(()+0x179c78) [0x7f835ba38c78]",
"(()+0x19d1c7) [0x7f835ba5c1c7]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(()+0xfa326) [0x7f835b9b9326]",
"(()+0x179e60) [0x7f835ba38e60]",
"(()+0x19d1c7) [0x7f835ba5c1c7]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(()+0x179c78) [0x7f835ba38c78]",
"(()+0x19d1c7) [0x7f835ba5c1c7]",
"(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
"(_PyFunction_FastCallDict()+0x122) [0x7f835b9b9ec2]",
"(_PyObject_FastCallDict()+0x70e) [0x7f835b9bac9e]",
"(()+0x10dc70) [0x7f835b9ccc70]",
"(PyObject_Call()+0x4b) [0x7f835b9c1acb]",
"(PyObject_CallMethod()+0x10b) [0x7f835ba5ac6b]",
"(ActivePyModule::handle_command(ModuleCommand const&, MgrSession
const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, boost::variant<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> >, bool, long, double,
std::vector<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > > >,
std::vector<long,
std::allocator<long> >, std::vector<double, std::allocator<double> >
>,
std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > const,
boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, bool, long, double,
std::vector<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > > >,
std::vector<long,
std::allocator<long> >, std::vector<double, std::allocator<double> >
> > >
const&, ceph::buffer::v15_2_0::list const&,
std::__cxx11::basic_stringstream<char, std::char_traits<char>,
std::allocator<char> >*, std::__cxx11::basic_stringstream<char,
std::char_traits<char>, std::allocator<char> >*)+0x222)
[0x55bc0b8a0cb2]",
"(()+0x1b0fdd) [0x55bc0b8f5fdd]",
"(Context::complete(int)+0xd) [0x55bc0b8b0bdd]",
"(Finisher::finisher_thread_entry()+0x1a5) [0x7f835c10b465]",
"(()+0x814a) [0x7f835a51214a]",
"(clone()+0x43) [0x7f8359032f23]"
],
"ceph_version": "15.2.8",
"crash_id":
"2021-02-10T20:00:32.989661Z_201fd5fb-6e0a-4b50-8a95-fdf9ed9aeb81",
"entity_name": "mgr.sds01-cp.cwcxek",
"os_id": "centos",
"os_name": "CentOS Linux",
"os_version": "8",
"os_version_id": "8",
"process_name": "ceph-mgr",
"stack_sig":
"e1c15d685283e7598b128a37a328ba86ec433dfef97597ac9453b5d52608feda",
"timestamp": "2021-02-10T20:00:32.989661Z",
"utsname_hostname": "sds01-cp",
"utsname_machine": "x86_64",
"utsname_release": "4.18.0-240.10.1.el8_3.x86_64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Wed Dec 16 03:30:52 EST 2020"
}