Fix: tests: `pgrep -f` flags unrelated process as lttng-sessiond
authorJonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Fri, 22 May 2020 14:36:46 +0000 (10:36 -0400)
committerJérémie Galarneau <jeremie.galarneau@efficios.com>
Tue, 11 Aug 2020 20:51:51 +0000 (16:51 -0400)
Observed issue
==============

lttng-sessiond is not started by start_lttng_sessiond_opt and a vim
process is killed on stop_lttng_sessiond_opt.

Cause
=====
We uses "pgrep -f" with the interested pattern to gather the pids that
should be the lttng processes we are interested in. `pgrep -f` yields
false positives since it matches against the complete cmdline including
parent directory of the command and all arguments.

For example, the following will currently match for the sessiond
pattern:
 vim src/bin/lttng-sessiond/notification-thread-internal.h

This prevents the launch of sessiond by start_lttng_sessiond_opt and end
up killing the vim process on stop_lttng_sessiond_opt.

Solution
========
To alleviate this, we propose a two stage lookup. The first stage uses
"pgrep -f" yielding potential candidates. The second stage performs
grep on the basename of the first field of the /proc/[pid]/cmdline
for each pid candidates.

The first field of /proc/[pid]/cmdline corresponds to the actual command.
We use the basename to ensure that we do not match on the path to the
executable.

Known drawbacks
=========
None

References
==========

https://review.lttng.org/c/lttng-tools/+/3043

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: I479ebad27f4965ae16d4442a6fe58ff3157d76fa

tests/regression/tools/working-directory/test_relayd_working_directory
tests/stress/test_multi_sessions_per_uid_10app
tests/stress/test_multi_sessions_per_uid_5app_streaming
tests/stress/test_multi_sessions_per_uid_5app_streaming_kill_relayd
tests/utils/utils.sh

index 0a5bad027bb8f0b995ad8b6451e49299423c1460..24b87a9cc1eb15003e72232778dc4d9bc656aa26 100755 (executable)
@@ -67,7 +67,7 @@ function test_relayd_daemon()
 
        start_lttng_relayd_opt 1 "-d" "--working-directory $working_dir"
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        ok $? "Found lttng-relayd"
 
        cwd=$(readlink "/proc/${pid}/cwd")
@@ -88,7 +88,7 @@ function test_relayd_daemon_no_working_dir()
 
        start_lttng_relayd_opt 1 "-d" ""
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        ok $? "Found lttng-relayd"
 
        cwd=$(readlink "/proc/${pid}/cwd")
@@ -111,7 +111,7 @@ function test_relayd_background()
 
        start_lttng_relayd_opt 1 "-b" "--working-directory $working_dir"
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        ok $? "Found lttng-relayd"
 
        cwd=$(readlink "/proc/${pid}/cwd")
@@ -132,7 +132,7 @@ function test_relayd_background_no_working_dir()
 
        start_lttng_relayd_opt 1 "-b" ""
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        ok $? "Found lttng-relayd"
 
        cwd=$(readlink "/proc/${pid}/cwd")
@@ -172,7 +172,7 @@ function test_relayd_debug_permission()
                ERROR_OUTPUT_DEST=$(mktemp)
                start_lttng_relayd_opt 1 "-b" "-v --working-dir $working_dir"
 
-               pid=$(pgrep "$RELAYD_MATCH")
+               pid=$(lttng_pgrep "$RELAYD_MATCH")
                ok $? "Found lttng-relayd"
 
                cwd=$(readlink "/proc/${pid}/cwd")
@@ -208,7 +208,7 @@ function test_relayd_failure()
        test $? -eq "1"
        ok $? "Expect failure to start lttng-relayd for non-existent working directory"
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        if [ -z "$pid" ]; then
                pass "No lttng-relayd present"
        else
@@ -237,7 +237,7 @@ function test_relayd_env()
        export LTTNG_RELAYD_WORKING_DIRECTORY=${working_dir}
        start_lttng_relayd_opt 1 "-b" ""
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        ok $? "Found lttng-relayd"
 
        cwd=$(readlink "/proc/$pid/cwd")
@@ -264,7 +264,7 @@ function test_relayd_cmdline_overwrite_env()
        export LTTNG_RELAYD_WORKING_DIRECTORY=${working_dir_env}
        start_lttng_relayd_opt 1 "-b" "--working-dir ${working_dir_cmdline}"
 
-       pid=$(pgrep "$RELAYD_MATCH")
+       pid=$(lttng_pgrep "$RELAYD_MATCH")
        ok $? "Found lttng-relayd"
 
        cwd=$(readlink "/proc/$pid/cwd")
index 4d4a78241c5ddf5445fab74f2da6e50ad16665d9..4aaf1d91a43a0fe01bac3e927400947972017504 100755 (executable)
@@ -36,7 +36,7 @@ function enable_channel_per_uid()
 
 function check_sessiond()
 {
-       if [ -z "$(pgrep --full lt-lttng-sessiond)" ]; then
+       if [ -z "$(lttng_pgrep lt-lttng-sessiond)" ]; then
                local str_date=$(date +%H%M%S-%d%m%Y)
 
                diag "!!!The session daemon died unexpectedly!!!"
@@ -56,7 +56,7 @@ function start_sessiond()
                BAIL_OUT "*** Kernel too old for session daemon tests ***"
        fi
 
-       if [ -z $(pgrep --full lt-$SESSIOND_BIN) ]; then
+       if [ -z $(lttng_pgrep lt-$SESSIOND_BIN) ]; then
                # We have to start it like this so the ulimit -c is used by this
                # process. Also, we collect any error message printed out.
                $TESTDIR/../src/bin/lttng-sessiond/$SESSIOND_BIN --quiet --background --consumerd32-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" --consumerd64-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" >$LOG_FILE 2>&1
index f8a0a4755d2723756a20e0ec4aa9a87ac7b995ba..a0f1898b68e82fc44f2942e9ed060ae3fac65203 100755 (executable)
@@ -37,7 +37,7 @@ function enable_channel_per_uid()
 
 function check_sessiond()
 {
-       if [ -z "$(pgrep --full lt-lttng-sessiond)" ]; then
+       if [ -z "$(lttng_pgrep lt-lttng-sessiond)" ]; then
                local str_date=$(date +%H%M%S-%d%m%Y)
 
                diag "!!!The session daemon died unexpectedly!!!"
@@ -51,7 +51,7 @@ function check_sessiond()
 
 function check_relayd()
 {
-       if [ -z "$(pgrep --full lt-lttng-relayd)" ]; then
+       if [ -z "$(lttng_pgrep lt-lttng-relayd)" ]; then
                local str_date=$(date +%H%M%S-%d%m%Y)
 
                diag "!!!The relay daemon died unexpectedly!!!"
@@ -71,7 +71,7 @@ function start_sessiond()
                BAIL_OUT "*** Kernel too old for session daemon tests ***"
        fi
 
-       if [ -z $(pgrep --full lt-$SESSIOND_BIN) ]; then
+       if [ -z $(lttng_pgrep lt-$SESSIOND_BIN) ]; then
                # We have to start it like this so the ulimit -c is used by this
                # process. Also, we collect any error message printed out.
                $TESTDIR/../src/bin/lttng-sessiond/$SESSIOND_BIN --quiet --background --consumerd32-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" --consumerd64-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" >$LOG_FILE_SESSIOND 2>&1
@@ -84,7 +84,7 @@ function start_relayd
 {
        local opt=$1
 
-       if [ -z $(pgrep --full lt-$RELAYD_BIN) ]; then
+       if [ -z $(lttng_pgrep lt-$RELAYD_BIN) ]; then
                $TESTDIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt >$LOG_FILE_RELAYD 2>&1 &
                ok $? "Start lttng-relayd (opt: \"$opt\")"
        fi
index e6ee7df7dd65d9c8804d3b433ef05a0d15116aff..c9a4576b63bbb63a6bef67fb5fcf22c3ff0a534e 100755 (executable)
@@ -38,7 +38,7 @@ function enable_channel_per_uid()
 
 function check_sessiond()
 {
-       if [ -z "$(pgrep --full lt-lttng-sessiond)" ]; then
+       if [ -z "$(lttng_pgrep lt-lttng-sessiond)" ]; then
                local str_date=$(date +%H%M%S-%d%m%Y)
 
                diag "!!!The session daemon died unexpectedly!!!"
@@ -58,7 +58,7 @@ function start_sessiond()
                BAIL_OUT "*** Kernel too old for session daemon tests ***"
        fi
 
-       if [ -z $(pgrep --full lt-$SESSIOND_BIN) ]; then
+       if [ -z $(lttng_pgrep lt-$SESSIOND_BIN) ]; then
                # We have to start it like this so the ulimit -c is used by this
                # process. Also, we collect any error message printed out.
                #$TESTDIR/../src/bin/lttng-sessiond/$SESSIOND_BIN --quiet --background --consumerd32-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" --consumerd64-path="$TESTDIR/../src/bin/lttng-consumerd/lttng-consumerd" >$LOG_FILE_SESSIOND 2>&1
@@ -73,7 +73,7 @@ function start_relayd
 {
        local opt=$1
 
-       if [ -z $(pgrep --full lt-$RELAYD_BIN) ]; then
+       if [ -z $(lttng_pgrep lt-$RELAYD_BIN) ]; then
                $TESTDIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt >$LOG_FILE_RELAYD 2>&1 &
                ok $? "Start lttng-relayd (opt: \"$opt\")"
        fi
@@ -81,7 +81,7 @@ function start_relayd
 
 function check_relayd()
 {
-       if [ -z "$(pgrep --full lt-lttng-relayd)" ]; then
+       if [ -z "$(lttng_pgrep lt-lttng-relayd)" ]; then
                local str_date=$(date +%H%M%S-%d%m%Y)
 
                #diag "Relay daemon died. Starting it again"
index d65662e4265a8fc05c429549d8097c231bd39ba3..1a6ea3c424016e4ecca9fa76f1726a168d39839d 100644 (file)
@@ -74,6 +74,42 @@ trap full_cleanup SIGINT SIGTERM
 
 trap null_pipes SIGPIPE
 
+# Check pgrep from env, default to pgrep if none
+if [ -z "$PGREP" ]; then
+       PGREP=pgrep
+fi
+
+# Due to the renaming of threads we need to use the full command (pgrep -f) to
+# identify the pids for multiple lttng related processes. The problem with "pgrep
+# -f" is that it ends up also looking at the arguments. We use a two stage
+# lookup. The first one is using "pgrep -f" yielding potential candidate.
+# The second on perform grep on the basename of the first field of the
+# /proc/pid/cmdline of the previously identified pids. The first field
+# correspond to the actual command.
+function lttng_pgrep ()
+{
+       local pattern=$1
+       local possible_pids
+       local full_command_no_argument
+       local command_basename
+
+       possible_pids=$($PGREP -f "$pattern")
+       if [ -z "$possible_pids" ]; then
+               return 0
+       fi
+
+       while IFS= read -r pid ; do
+               # /proc/pid/cmdline is null separated.
+               if full_command_no_argument=$(cut -d '' -f 1 < /proc/"$pid"/cmdline); then
+                       command_basename=$(basename "$full_command_no_argument")
+                       if grep -q "$pattern" <<< "$command_basename"; then
+                               echo "$pid"
+                       fi
+               fi
+       done <<< "$possible_pids"
+       return 0
+}
+
 function print_ok ()
 {
        # Check if we are a terminal
@@ -401,7 +437,7 @@ function start_lttng_relayd_opt()
 
        DIR=$(readlink -f "$TESTDIR")
 
-       if [ -z $(pgrep -f $RELAYD_MATCH) ]; then
+       if [ -z $(lttng_pgrep "$RELAYD_MATCH") ]; then
                # shellcheck disable=SC2086
                $DIR/../src/bin/lttng-relayd/$RELAYD_BIN $process_mode $opt 1> $OUTPUT_DEST 2> $ERROR_OUTPUT_DEST
                #$DIR/../src/bin/lttng-relayd/$RELAYD_BIN $opt -vvv >>/tmp/relayd.log 2>&1 &
@@ -450,7 +486,7 @@ function stop_lttng_relayd_opt()
        local retval=0
        local pids=
 
-       pids=$(pgrep -f "$RELAYD_MATCH")
+       pids=$(lttng_pgrep "$RELAYD_MATCH")
        if [ -z "$pids" ]; then
                if [ "$withtap" -eq "1" ]; then
                        pass "No relay daemon to kill"
@@ -469,7 +505,7 @@ function stop_lttng_relayd_opt()
        else
                out=1
                while [ -n "$out" ]; do
-                       out=$(pgrep -f "$RELAYD_MATCH")
+                       out=$(lttng_pgrep "$RELAYD_MATCH")
                        if [ -n "$dtimeleft_s" ]; then
                                if [ $dtimeleft_s -lt 0 ]; then
                                        out=
@@ -547,7 +583,7 @@ function start_lttng_sessiond_opt()
        : "${LTTNG_SESSION_CONFIG_XSD_PATH="${DIR}/../src/common/config/"}"
        export LTTNG_SESSION_CONFIG_XSD_PATH
 
-       if [ -z "$(pgrep -f "${SESSIOND_MATCH}")" ]; then
+       if [ -z "$(lttng_pgrep "${SESSIOND_MATCH}")" ]; then
                # Have a load path ?
                if [ -n "$load_path" ]; then
                        # shellcheck disable=SC2086
@@ -599,10 +635,10 @@ function stop_lttng_sessiond_opt()
        local retval=0
 
        local runas_pids=
-       runas_pids=$(pgrep -f "$RUNAS_MATCH")
+       runas_pids=$(lttng_pgrep "$RUNAS_MATCH")
 
        local pids=
-       pids=$(pgrep -f "$SESSIOND_MATCH")
+       pids=$(lttng_pgrep "$SESSIOND_MATCH")
 
        if [ -n "$runas_pids" ]; then
                pids="$pids $runas_pids"
@@ -626,7 +662,7 @@ function stop_lttng_sessiond_opt()
        else
                out=1
                while [ -n "$out" ]; do
-                       out=$(pgrep -f "${SESSIOND_MATCH}")
+                       out=$(lttng_pgrep "${SESSIOND_MATCH}")
                        if [ -n "$dtimeleft_s" ]; then
                                if [ $dtimeleft_s -lt 0 ]; then
                                        out=
@@ -638,7 +674,7 @@ function stop_lttng_sessiond_opt()
                done
                out=1
                while [ -n "$out" ]; do
-                       out=$(pgrep -f "$CONSUMERD_MATCH")
+                       out=$(lttng_pgrep "$CONSUMERD_MATCH")
                        if [ -n "$dtimeleft_s" ]; then
                                if [ $dtimeleft_s -lt 0 ]; then
                                        out=
@@ -692,7 +728,7 @@ function sigstop_lttng_sessiond_opt()
                return
        fi
 
-       PID_SESSIOND="$(pgrep -f "${SESSIOND_MATCH}") $(pgrep -f "$RUNAS_MATCH")"
+       PID_SESSIOND="$(lttng_pgrep "${SESSIOND_MATCH}") $(lttng_pgrep "$RUNAS_MATCH")"
 
        if [ "$withtap" -eq "1" ]; then
                diag "Sending SIGSTOP to lt-$SESSIOND_BIN and $SESSIOND_BIN pids: $(echo "$PID_SESSIOND" | tr '\n' ' ')"
@@ -706,7 +742,7 @@ function sigstop_lttng_sessiond_opt()
        else
                out=1
                while [ $out -ne 0 ]; do
-                       pid="$(pgrep -f "$SESSIOND_MATCH")"
+                       pid="$(lttng_pgrep "$SESSIOND_MATCH")"
 
                        # Wait until state becomes stopped for session
                        # daemon(s).
@@ -754,7 +790,7 @@ function stop_lttng_consumerd_opt()
 
        local retval=0
 
-       PID_CONSUMERD="$(pgrep -f "$CONSUMERD_MATCH")"
+       PID_CONSUMERD="$(lttng_pgrep "$CONSUMERD_MATCH")"
 
        if [ -z "$PID_CONSUMERD" ]; then
                if [ "$withtap" -eq "1" ]; then
@@ -774,7 +810,7 @@ function stop_lttng_consumerd_opt()
        else
                out=1
                while [ $out -ne 0 ]; do
-                       pid="$(pgrep -f "$CONSUMERD_MATCH")"
+                       pid="$(lttng_pgrep "$CONSUMERD_MATCH")"
 
                        # If consumerds are still present check their status.
                        # A zombie status qualifies the consumerd as *killed*
@@ -821,7 +857,7 @@ function sigstop_lttng_consumerd_opt()
        local withtap=$1
        local signal=SIGSTOP
 
-       PID_CONSUMERD="$(pgrep -f "$CONSUMERD_MATCH")"
+       PID_CONSUMERD="$(lttng_pgrep "$CONSUMERD_MATCH")"
 
        diag "Sending SIGSTOP to $CONSUMERD_BIN pids: $(echo "$PID_CONSUMERD" | tr '\n' ' ')"
 
@@ -837,7 +873,7 @@ function sigstop_lttng_consumerd_opt()
        else
                out=1
                while [ $out -ne 0 ]; do
-                       pid="$(pgrep -f "$CONSUMERD_MATCH")"
+                       pid="$(lttng_pgrep "$CONSUMERD_MATCH")"
 
                        # Wait until state becomes stopped for all
                        # consumers.
This page took 0.045578 seconds and 4 git commands to generate.