From 2a6ebf6bdc5c9b8855def9b0fa3fda6d66c83946 Mon Sep 17 00:00:00 2001 From: Jonathan Rajotte Date: Mon, 28 Mar 2022 16:49:17 -0400 Subject: [PATCH] Fix: sessiond: assertion hit in ltt_sessions_ht_empty MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Observed issue ============== Scenario: gdb lttng-sessiond set non-stop break rotation-thread.cpp:584 ^ simulates a slow rotation thread or not scheduled thread. lttng create test1 lttng enable-event -u -a lttng start test1 lttng create test2 lttng enable-event -u -a lttng start test2 lttng destroy test1 This will hang on rotation pending checks on the CLI side. In another shell: lttng destroy test2 This will hang on rotation pending checks on the CLI side. Back to gdb thread 7 continue Results in: #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 #1 0x00007ffff786c859 in __GI_abort () at abort.c:79 #2 0x00007ffff786c729 in __assert_fail_base (fmt=0x7ffff7a02588 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5555556bb148 "count == lttng_ht_get_count(ltt_sessions_ht_by_name)", file=0x5555556bae9f "session.cpp", line=395, function=) at assert.c:92 #3 0x00007ffff787e006 in __GI___assert_fail (assertion=0x5555556bb148 "count == lttng_ht_get_count(ltt_sessions_ht_by_name)", file=0x5555556bae9f "session.cpp", line=395, function=0x5555556bb129 "int ltt_sessions_ht_empty()") at assert.c:101 #4 0x0000555555586d59 in ltt_sessions_ht_empty () at session.cpp:395 #5 0x0000555555586e53 in del_session_ht (ls=0x7fffdc000c30) at session.cpp:418 #6 0x0000555555588a95 in session_release (ref=0x7fffdc001e50) at session.cpp:999 #7 0x000055555558620f in urcu_ref_put (ref=0x7fffdc001e50, release=0x5555555886eb ) at /home/joraj/lttng/master/install/include/urcu/ref.h:68 #8 0x0000555555588c8f in session_put (session=0x7fffdc000c30) at session.cpp:1048 #9 0x00005555555bf995 in handle_job_queue (handle=0x55555575d260, state=0x7fffeeffc240, queue=0x555555758960) at rotation-thread.cpp:612 #10 0x00005555555c05da in thread_rotation (data=0x55555575d260) at rotation-thread.cpp:847 #11 0x00005555555c3b1c in launch_thread (data=0x55555575d2f0) at thread.cpp:66 #12 0x00007ffff7a46609 in start_thread (arg=) at pthread_create.c:477 #13 0x00007ffff7969163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Other scenarios can lead to a similar backtrace when using the `--no-wait` lttng destroy option. Cause ===== Since ed41e5709047ef545aa28082416e641e003b45e0 [1], hash table removal for the session object for the `ltt_sessions_ht_by_name` and `ltt_sessions_ht_by_name` are "decoupled". Removal from `ltt_sessions_ht_by_name` is done early in `session_destroy()` while removal from `ltt_sessions_ht_by_id` is done during `session_release` when the last reference of a session object is released. This can leads to `imbalances` between the size of the two hash tables when multiple sessions are at play. Solution ======== Rework `ltt_sessions_ht_empty()` to exit early when `ltt_sessions_ht_by_id` is not empty. Perform a sanity check on `ltt_sessions_ht_by_name` only when `ltt_sessions_ht_by_id` is empty. Note ======== Ideally both hash tables' lifetime would be managed separately but it seems easier in term of initialization to bundle them together for now considering the limited scope of the `ltt_sessions_ht_by_name` hash table. Signed-off-by: Jonathan Rajotte Signed-off-by: Jérémie Galarneau Change-Id: I66c459f80298f929add703ac977cccd1da6dd556 --- src/bin/lttng-sessiond/session.cpp | 35 ++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/bin/lttng-sessiond/session.cpp b/src/bin/lttng-sessiond/session.cpp index fc43684e9..304589068 100644 --- a/src/bin/lttng-sessiond/session.cpp +++ b/src/bin/lttng-sessiond/session.cpp @@ -377,24 +377,45 @@ end: /* * Test if ltt_sessions_ht_by_id/name are empty. - * Return 1 if empty, 0 if not empty. + * Return `false` if hash table objects are null. * The session list lock must be held. */ -static int ltt_sessions_ht_empty(void) +static bool ltt_sessions_ht_empty(void) { - unsigned long count; + bool empty = false; if (!ltt_sessions_ht_by_id) { - count = 0; + /* The hash tables do not exist yet. */ goto end; } LTTNG_ASSERT(ltt_sessions_ht_by_name); - count = lttng_ht_get_count(ltt_sessions_ht_by_id); - LTTNG_ASSERT(count == lttng_ht_get_count(ltt_sessions_ht_by_name)); + if (lttng_ht_get_count(ltt_sessions_ht_by_id) == 0) { + /* Not empty.*/ + goto end; + } + + /* + * At this point it is expected that the `ltt_sessions_ht_by_name` ht is + * empty. + * + * The removal from both hash tables is done in two different + * places: + * - removal from `ltt_sessions_ht_by_name` is done during + * `session_destroy()` + * - removal from `ltt_sessions_ht_by_id` is done later + * in `session_release()` on the last reference put. + * + * This means that it is possible for `ltt_sessions_ht_by_name` to be + * empty but for `ltt_sessions_ht_by_id` to still contain objects when + * multiple sessions exists. The reverse is false, hence this sanity + * check. + */ + LTTNG_ASSERT(lttng_ht_get_count(ltt_sessions_ht_by_name) == 0); + empty = true; end: - return count ? 0 : 1; + return empty; } /* -- 2.34.1