Fix: consumer: put each chunk on teardown
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 14 Aug 2019 20:32:58 +0000 (16:32 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Thu, 5 Sep 2019 00:00:57 +0000 (20:00 -0400)
Trace chunks in the registry may still exist because those are top-level
entities for which we may have received a "create" command from session
daemon without pairing "destroy". Iterate on the entire chunk registry
and put the sessiond reference for each chunk on consumerd teardown

Fixes the following assert on teardown caused by non-empty registry on
SIGINT:

lttng-consumerd: trace-chunk.c:1109: lttng_trace_chunk_registry_destroy: Assertion `!ret' failed.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
src/common/consumer/consumer.c
src/common/trace-chunk-registry.h
src/common/trace-chunk.c

index 82be751c40ef82e83e47e10e785caa0d131cffbe..a53b7383ec170b279211609043385c8104c6d861 100644 (file)
@@ -1408,6 +1408,7 @@ void lttng_consumer_cleanup(void)
 {
        struct lttng_ht_iter iter;
        struct lttng_consumer_channel *channel;
+       unsigned int trace_chunks_left;
 
        rcu_read_lock();
 
@@ -1432,6 +1433,27 @@ void lttng_consumer_cleanup(void)
         */
        lttng_ht_destroy(consumer_data.stream_list_ht);
 
+       /*
+        * Trace chunks in the registry may still exist if the session
+        * daemon has encountered an internal error and could not
+        * tear down its sessions and/or trace chunks properly.
+        *
+        * Release the session daemon's implicit reference to any remaining
+        * trace chunk and print an error if any trace chunk was found. Note
+        * that there are _no_ legitimate cases for trace chunks to be left,
+        * it is a leak. However, it can happen following a crash of the
+        * session daemon and not emptying the registry would cause an assertion
+        * to hit.
+        */
+       trace_chunks_left = lttng_trace_chunk_registry_put_each_chunk(
+                       consumer_data.chunk_registry);
+       if (trace_chunks_left) {
+               ERR("%u trace chunks are leaked by lttng-consumerd. "
+                               "This can be caused by an internal error of the session daemon.",
+                               trace_chunks_left);
+       }
+       /* Run all callbacks freeing each chunk. */
+       rcu_barrier();
        lttng_trace_chunk_registry_destroy(consumer_data.chunk_registry);
 }
 
index 721453a2e0d03279aa7b71f2d6d0b8a64681857b..23df91a3eca24a13ac64b0eb3a0a5d9b983d5ebf 100644 (file)
@@ -95,4 +95,8 @@ lttng_trace_chunk_registry_find_anonymous_chunk(
                const struct lttng_trace_chunk_registry *registry,
                uint64_t session_id);
 
+LTTNG_HIDDEN
+unsigned int lttng_trace_chunk_registry_put_each_chunk(
+               struct lttng_trace_chunk_registry *registry);
+
 #endif /* LTTNG_TRACE_CHUNK_REGISTRY_H */
index a183fbeb0ff18d6aa969d984685228c0691cc46e..dbab99d192876a73ce49be5975b674f55ec4e60e 100644 (file)
@@ -1293,3 +1293,49 @@ lttng_trace_chunk_registry_find_anonymous_chunk(
         return _lttng_trace_chunk_registry_find_chunk(registry,
                        session_id, NULL);
 }
+
+unsigned int lttng_trace_chunk_registry_put_each_chunk(
+               struct lttng_trace_chunk_registry *registry)
+{
+       struct cds_lfht_iter iter;
+       struct lttng_trace_chunk_registry_element *chunk_element;
+       unsigned int trace_chunks_left = 0;
+
+       DBG("Releasing trace chunk registry to all trace chunks");
+       rcu_read_lock();
+       cds_lfht_for_each_entry(registry->ht,
+                       &iter, chunk_element, trace_chunk_registry_ht_node) {
+               const char *chunk_id_str = "none";
+               char chunk_id_buf[MAX_INT_DEC_LEN(uint64_t)];
+
+               pthread_mutex_lock(&chunk_element->chunk.lock);
+               if (chunk_element->chunk.id.is_set) {
+                       int fmt_ret;
+
+                       fmt_ret = snprintf(chunk_id_buf, sizeof(chunk_id_buf),
+                                       "%" PRIu64,
+                                       chunk_element->chunk.id.value);
+                       if (fmt_ret < 0 || fmt_ret >= sizeof(chunk_id_buf)) {
+                               chunk_id_str = "formatting error";
+                       } else {
+                               chunk_id_str = chunk_id_buf;
+                       }
+               }
+
+               DBG("Releasing reference to trace chunk: session_id = %" PRIu64
+                               "chunk_id = %s, name = \"%s\", status = %s",
+                               chunk_element->session_id,
+                               chunk_id_str,
+                               chunk_element->chunk.name ? : "none",
+                               chunk_element->chunk.close_command.is_set ?
+                                               "open" : "closed");
+               pthread_mutex_unlock(&chunk_element->chunk.lock);
+               lttng_trace_chunk_put(&chunk_element->chunk);
+               trace_chunks_left++;
+       }
+       rcu_read_unlock();
+       DBG("Released reference to %u trace chunks in %s()", trace_chunks_left,
+                       __FUNCTION__);
+
+       return trace_chunks_left;
+}
This page took 0.02829 seconds and 4 git commands to generate.