fix: relayd: unaligned access in trace_chunk_registry_ht_key_hash master
authorMichael Jeanson <mjeanson@efficios.com>
Tue, 30 Apr 2024 19:17:52 +0000 (15:17 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Wed, 1 May 2024 14:56:42 +0000 (10:56 -0400)
In 328c2fe7297c941aa9cbcfa4ce944fca1bd7300f, the type of 'lttng_uuid'
was changed from a C array of 16 'uint8_t' to a C++ std::array of the
same type and length.

In 'trace_chunk_registry_ht_key_hash()' we access these 16 bytes as 2
'uint64_t', to do so we used to cast the array to '(uint64_t *)' and
then access index 0 and 1.

When it was converted to C++, an error was introduced where instead we
reinterpret_cast to 'const uint64_t *' the index 0 and 1 of the array
which results in a 'uint64_t' pointer to the first and second bytes of
the array. These values overlap but since they are used as keys for an
hash table it still works. However, on platforms that don't allow
unaligned access, the second pointer being only offset by one byte
results in a 'Bus error'.

Reintroduce the old behavior by applying the index 0 and 1 to the
pointer resulting from the reinterpret_cast.

Change-Id: I2bc287edbe6864a2a870f9de1f3b4dd8f8a90ace
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
14 files changed:
.gitignore
.vscode/build.sh [new file with mode: 0755]
.vscode/launch.json [new file with mode: 0644]
.vscode/libtool_gdb_wrapper.sh [new file with mode: 0755]
.vscode/tasks.json [new file with mode: 0644]
README.adoc
src/bin/lttng-relayd/sessiond-trace-chunks.cpp
src/common/consumer/consumer.cpp
src/common/consumer/consumer.hpp
src/common/event-rule/kernel-syscall.cpp
tests/regression/Makefile.am
tests/regression/tools/live/Makefile.am
tests/regression/tools/live/test_per_application_leaks.py [new file with mode: 0755]
tests/utils/lttngtest/environment.py

index 4c5a4e5e9999f9b0dbf86be3eb07f51df7a0983d..b0884eb16f858d8d970b3ba478048fc3b1b97e31 100644 (file)
@@ -47,7 +47,6 @@ TAGS
 /.clangd/
 compile_commands.json
 *_flymake*
-/.vscode/*
 
 # m4 macros not automatically generated
 /m4/libtool.m4
diff --git a/.vscode/build.sh b/.vscode/build.sh
new file mode 100755 (executable)
index 0000000..9a4e362
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Copyright (C) 2024 Jérémie Galarneau <jeremie.galarneau@efficios.com>
+#
+# SPDX-License-Identifier: LGPL-2.1-only
+#
+
+source_dir="$1"
+
+# Run make quietly to check if a Makefile exists
+make_output=$(make -C "$source_dir" -q 2>&1)
+make_exit_status=$?
+
+# Check the return status of make -q
+if [ $make_exit_status -eq 2 ]; then
+    # It seems the Makefiles don't exist. Most likely the user forgot to
+    # setup their tree.
+    echo "$make_output"
+    echo -e "\033[33mMake couldn't find a Makefile: did you run ./bootstrap and ./configure ?\033[0m"
+    exit 1
+fi
+
+# Check if compile_commands.json does not exist in the source directory and if bear is installed
+if [ ! -f "$source_dir/compile_commands.json" ] && which bear >/dev/null 2>&1; then
+    # Bear is installed and compile_commands.json is not present
+    # Perform a make clean since compile_commands.json is missing and bear is installed
+    make -C "$source_dir" clean
+
+    # Prefix bear to the make command
+    command_prefix="bear -- "
+fi
+
+# Run make with or without bear prefix, depending on the condition above
+eval "${command_prefix}"make -C "$source_dir" -j "$(nproc)"
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644 (file)
index 0000000..4541842
--- /dev/null
@@ -0,0 +1,76 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug LTTng Client",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/src/bin/lttng/.libs/lttng",
+            // Replace with your args
+            "args": [
+                "help"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "miDebuggerPath": "${workspaceFolder}/.vscode/libtool_gdb_wrapper.sh",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "Build LTTng-tools"
+        },
+        {
+            "name": "Debug LTTng Session Daemon",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/src/bin/lttng-sessiond/.libs/lttng-sessiond",
+            "args": [],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            // The session daemon fails to launch if it can't find the session schema description
+            "environment": [
+                {
+                    "name": "LTTNG_SESSION_CONFIG_XSD_PATH",
+                    "value": "${workspaceFolder}/src/common/"
+                }
+            ],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "miDebuggerPath": "${workspaceFolder}/.vscode/libtool_gdb_wrapper.sh",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "Build LTTng-tools"
+        },
+        {
+            "name": "Debug LTTng Relay Daemon",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/src/bin/lttng-relayd/lttng-relayd",
+            "args": [],
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "miDebuggerPath": "${workspaceFolder}/.vscode/libtool_gdb_wrapper.sh",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "Build LTTng-tools"
+        },
+    ]
+}
\ No newline at end of file
diff --git a/.vscode/libtool_gdb_wrapper.sh b/.vscode/libtool_gdb_wrapper.sh
new file mode 100755 (executable)
index 0000000..0f60c83
--- /dev/null
@@ -0,0 +1,9 @@
+#!/usr/bin/env sh
+# Copyright (C) 2024 Jérémie Galarneau <jeremie.galarneau@efficios.com>
+#
+# SPDX-License-Identifier: LGPL-2.1-only
+#
+# Wrapper script to setup the environment before invoking gdb
+# on the in-tree binaries (under `.libs`)
+
+libtool --mode=execute gdb "$@"
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
new file mode 100644 (file)
index 0000000..9f28bc5
--- /dev/null
@@ -0,0 +1,21 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "type": "shell",
+            "label": "Build LTTng-tools",
+            // Assumes you ran ./bootstrap and ./configure with your preferred options
+            "command": "${workspaceFolder}/.vscode/build.sh ${workspaceFolder}",
+            "options": {
+                "cwd": "${workspaceFolder}"
+            },
+            "problemMatcher": [
+                "$gcc"
+            ],
+            "group": {
+                "kind": "build",
+                "isDefault": true
+            }
+        }
+    ]
+}
\ No newline at end of file
index 6ec60856a1419b28e202b0afd4fcf1baba0b8244..676467aa4cfbe09499994f8232865c24c33666d0 100644 (file)
@@ -221,6 +221,21 @@ As there's no official {lib} Python bindings yet, see
 link:doc/python-howto.txt[`doc/python-howto.txt`] to understand how to
 use them.
 
+== Supported versions
+
+The LTTng project supports the last two released stable versions
+(e.g. stable-2.13 and stable-2.12).
+
+Fixes are backported from the master branch to the last stable version unless
+those fixes would break the ABI or API. Those fixes may be backported to the
+second-last stable version, depending on complexity and ABI/API compatibility.
+
+Security fixes are backported from the master branch to both of the last stable
+version and the second-last stable version.
+
+New features are integrated into the master branch and not backported to the
+last stable branch.
+
 == Community
 
 Mailing list::
index 9d793c9bcf79315e722447fcb3c205f785a60c54..2326878fb9fd6a9dd2f9d6dfd8dc8dd4504243f1 100644 (file)
@@ -78,8 +78,8 @@ struct trace_chunk_registry_ht_element {
 
 static unsigned long trace_chunk_registry_ht_key_hash(const struct trace_chunk_registry_ht_key *key)
 {
-       const uint64_t uuid_h1 = *reinterpret_cast<const uint64_t *>(&key->sessiond_uuid[0]);
-       const uint64_t uuid_h2 = *reinterpret_cast<const uint64_t *>(&key->sessiond_uuid[1]);
+       const uint64_t uuid_h1 = reinterpret_cast<const uint64_t *>(key->sessiond_uuid.data())[0];
+       const uint64_t uuid_h2 = reinterpret_cast<const uint64_t *>(key->sessiond_uuid.data())[1];
 
        return hash_key_u64(&uuid_h1, lttng_ht_seed) ^ hash_key_u64(&uuid_h2, lttng_ht_seed);
 }
index ed844f8dfd8a4d27826ea6b8644d04fc244b0014..1da243601cd7ff3c72fc8febb5d6c11f2f150c4b 100644 (file)
@@ -178,13 +178,6 @@ static void clean_channel_stream_list(struct lttng_consumer_channel *channel)
 
        /* Delete streams that might have been left in the stream list. */
        cds_list_for_each_entry_safe (stream, stmp, &channel->streams.head, send_node) {
-               /*
-                * Once a stream is added to this list, the buffers were created so we
-                * have a guarantee that this call will succeed. Setting the monitor
-                * mode to 0 so we don't lock nor try to delete the stream from the
-                * global hash table.
-                */
-               stream->monitor = 0;
                consumer_stream_destroy(stream, nullptr);
        }
 }
index 310969172d73f656a258d038cfdb1c0c55c9441f..851b3e2b8137eaef4d5904b19deb81a3f9921bdf 100644 (file)
@@ -922,7 +922,7 @@ void lttng_consumer_set_command_sock_path(struct lttng_consumer_local_data *ctx,
  * on error.
  */
 int lttng_consumer_send_error(struct lttng_consumer_local_data *ctx,
-               enum lttcomm_return_code error_code);
+                             enum lttcomm_return_code error_code);
 
 /*
  * Called from signal handler to ensure a clean exit.
index 23a432aacaaff285e6f0e3cb735d8cb1b4b3f26a..933c3543fcc155d5e74e3310089397155d9d4a76 100644 (file)
@@ -133,6 +133,10 @@ static bool lttng_event_rule_kernel_syscall_is_equal(const struct lttng_event_ru
                goto end;
        }
 
+       if (a->emission_site != b->emission_site) {
+               goto end;
+       }
+
        is_equal = true;
 end:
        return is_equal;
index ca3fb2d52d281968f9ffb9daeca2e47035c4bb53..da5cb1b57c1d88b96cac9f178599bceb50ee7b7d 100644 (file)
@@ -16,6 +16,7 @@ TESTS = tools/base-path/test_ust \
        tools/health/test_thread_ok \
        tools/live/test_kernel \
        tools/live/test_lttng_kernel \
+       tools/live/test_per_application_leaks.py \
        tools/live/test_ust \
        tools/live/test_ust_tracefile_count \
        tools/live/test_lttng_ust \
index 494b982df1989ec0d4698b39f443f7b41c8176cd..3ad9b7ba35f23e4b5a296c3dc7391acbc6d7e430 100644 (file)
@@ -6,7 +6,7 @@ LIBTAP=$(top_builddir)/tests/utils/tap/libtap.la
 LIBLTTNG_SESSIOND_COMMON=$(top_builddir)/src/bin/lttng-sessiond/liblttng-sessiond-common.la
 
 noinst_PROGRAMS = live_test
-EXTRA_DIST = test_kernel test_lttng_kernel
+EXTRA_DIST = test_kernel test_lttng_kernel test_per_application_leaks.py
 
 if HAVE_LIBLTTNG_UST_CTL
 EXTRA_DIST += test_ust test_ust_tracefile_count test_lttng_ust
diff --git a/tests/regression/tools/live/test_per_application_leaks.py b/tests/regression/tools/live/test_per_application_leaks.py
new file mode 100755 (executable)
index 0000000..1d12049
--- /dev/null
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCyoprightText: Kienan Stewart <kstewart@efficios.com>
+# SPDX-License-Identifier: GPL-2.0-only
+
+"""
+Test that the consumerd doesn't leak file descriptor allocations in /dev/shm
+when the relayd exits before instrumented applications start.
+
+@see https://bugs.lttng.org/issues/1411
+"""
+
+import os
+import pathlib
+import subprocess
+import sys
+
+test_utils_import_path = pathlib.Path(__file__).absolute().parents[3] / "utils"
+sys.path.append(str(test_utils_import_path))
+
+import lttngtest
+
+
+def get_consumerd_pid(tap, parent, match_string):
+    pid = 0
+    try:
+        process = subprocess.Popen(
+            ["pgrep", "-P", str(parent), "-f", match_string],
+            stdout=subprocess.PIPE,
+        )
+        process.wait()
+        output = str(process.stdout.read(), encoding="UTF-8").splitlines()
+        if len(output) != 1:
+            raise Exception(
+                "Unexpected number of output lines (got {}): {}".format(
+                    len(output), output
+                )
+            )
+        pid = int(output[0])
+    except Exception as e:
+        tap.diagnostic(
+            "Failed to find child process of '{}' matching '{}': '{}'".format(
+                parent, match_string, str(e)
+            )
+        )
+    return pid
+
+
+def count_process_dev_shm_fds(pid):
+    count = 0
+    if pid == 0:
+        return count
+    dir = os.path.join("/proc", str(pid), "fd")
+    for root, dirs, files in os.walk(dir):
+        for f in files:
+            filename = pathlib.Path(os.path.join(root, f))
+            try:
+                if filename.is_symlink() and str(filename.resolve()).startswith(
+                    "/dev/shm/shm-ust-consumer"
+                ):
+                    count += 1
+            except FileNotFoundError:
+                # As we're walking /proc/XX/fd/, fds may be added or removed
+                continue
+    return count
+
+
+def count_dev_shm_fds(tap, test_env):
+    consumer32_pid = get_consumerd_pid(tap, test_env._sessiond.pid, "ustconsumerd32")
+    fds_consumerd32 = count_process_dev_shm_fds(consumer32_pid)
+    consumer64_pid = get_consumerd_pid(tap, test_env._sessiond.pid, "ustconsumerd64")
+    fds_consumerd64 = count_process_dev_shm_fds(consumer64_pid)
+    return (fds_consumerd32, fds_consumerd64)
+
+
+def test_fd_leak(tap, test_env, buffer_sharing_policy, kill_relayd=True):
+    tap.diagnostic(
+        "test_fd_leak with buffer sharing policy {}, kill relayd: {}".format(
+            buffer_sharing_policy, kill_relayd
+        )
+    )
+    client = lttngtest.LTTngClient(test_env, log=tap.diagnostic)
+    output = lttngtest.NetworkSessionOutputLocation(
+        "net://localhost:{}:{}/".format(
+            test_env.lttng_relayd_control_port, test_env.lttng_relayd_data_port
+        )
+    )
+
+    session = client.create_session(output=output, live=True)
+    channel = session.add_channel(
+        lttngtest.lttngctl.TracingDomain.User,
+        buffer_sharing_policy=buffer_sharing_policy,
+    )
+    channel.add_recording_rule(lttngtest.lttngctl.UserTracepointEventRule())
+    session.start()
+
+    count_post_start = count_dev_shm_fds(tap, test_env)
+
+    # Kill the relayd
+    if kill_relayd:
+        test_env._terminate_relayd()
+
+    test_env.launch_wait_trace_test_application(10)
+    count_post_app1 = count_dev_shm_fds(tap, test_env)
+
+    test_env.launch_wait_trace_test_application(10)
+    count_post_app2 = count_dev_shm_fds(tap, test_env)
+
+    test_env.launch_wait_trace_test_application(10)
+    count_post_app3 = count_dev_shm_fds(tap, test_env)
+
+    session.stop()
+    session.destroy()
+
+    count_post_destroy = count_dev_shm_fds(tap, test_env)
+
+    tap.diagnostic(
+        "FD counts post-start: {}, post-destroy: {}".format(
+            count_post_start, count_post_destroy
+        )
+    )
+    tap.test(
+        count_post_start == count_post_destroy,
+        "Count of consumerd FDs in /dev/shm are equal after session start then after destroy",
+    )
+
+    tap.diagnostic(
+        "FD counts post-app-1: {}, post-app-2: {}, post-app-3: {}".format(
+            count_post_app1, count_post_app2, count_post_app3
+        )
+    )
+    if buffer_sharing_policy == lttngtest.lttngctl.BufferSharingPolicy.PerUID:
+        tap.test(
+            (count_post_app1 == count_post_app2)
+            and (count_post_app2 == count_post_app3),
+            "Count of consumerd FDs in /dev/shm doesn't leak over several application invocations",
+        )
+    else:
+        tap.skip(
+            "Count of consumerds FDs in /dev/shm doesn't leak over several application invocations - no mechanism is available to guarantee buffer reclamation within a given time frame"
+        )
+
+
+tap = lttngtest.TapGenerator(8)
+for kill_relayd in [True, False]:
+    for buffer_sharing_policy in [
+        lttngtest.lttngctl.BufferSharingPolicy.PerUID,
+        lttngtest.lttngctl.BufferSharingPolicy.PerPID,
+    ]:
+        with lttngtest.test_environment(
+            log=tap.diagnostic, with_relayd=True, with_sessiond=True
+        ) as test_env:
+            test_fd_leak(tap, test_env, buffer_sharing_policy, kill_relayd)
+
+sys.exit(0 if tap.is_successful else 1)
index 74dcfb8d26cebf89c66fbd187d77e4eef1a0dcc6..73d18874b293f1fa23e7c7e2dd267b33e81dd149 100644 (file)
@@ -618,14 +618,20 @@ class _Environment(logger._Logger):
         Launch an application that will trace from within constructors.
         """
         return _TraceTestApplication(
-            self._project_root
-            / "tests"
-            / "utils"
-            / "testapp"
-            / subpath,
+            self._project_root / "tests" / "utils" / "testapp" / subpath,
             self,
         )
 
+    def _terminate_relayd(self):
+        if self._relayd and self._relayd.poll() is None:
+            self._relayd.terminate()
+            self._relayd.wait()
+            if self._relayd_output_consumer:
+                self._relayd_output_consumer.join()
+                self._relayd_output_consumer = None
+            self._log("Relayd killed")
+            self._relayd = None
+
     # Clean-up managed processes
     def _cleanup(self):
         # type: () -> None
@@ -646,14 +652,7 @@ class _Environment(logger._Logger):
             self._log("Session daemon killed")
             self._sessiond = None
 
-        if self._relayd and self._relayd.poll() is None:
-            self._relayd.terminate()
-            self._relayd.wait()
-            if self._relayd_output_consumer:
-                self._relayd_output_consumer.join()
-                self._relayd_output_consumer = None
-            self._log("Relayd killed")
-            self._relayd = None
+        self._terminate_relayd()
 
         self._lttng_home = None
 
This page took 0.03271 seconds and 4 git commands to generate.