health check tests: test relayd and consumerd
[lttng-tools.git] / tests / regression / tools / health / test_health.sh
diff --git a/tests/regression/tools/health/test_health.sh b/tests/regression/tools/health/test_health.sh
new file mode 100644 (file)
index 0000000..6ae8885
--- /dev/null
@@ -0,0 +1,310 @@
+# Copyright (C) - 2012 Christian Babeux <christian.babeux@efficios.com>
+# Copyright (C) - 2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License, version 2 only, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+TESTDIR=${CURDIR}/../../..
+LTTNG_BIN="lttng"
+UST_EVENT_NAME="tp:tptest"
+KERNEL_EVENT_NAME="sched_switch"
+CHANNEL_NAME="testchan"
+HEALTH_CHECK_BIN="health_check"
+NUM_TESTS=90
+SLEEP_TIME=30
+
+source $TESTDIR/utils/utils.sh
+
+if [ ! -f "$CURDIR/$SESSIOND_PRELOAD" ]; then
+       BAIL_OUT "${CURDIR}/${SESSIOND_PRELOAD} is missing."
+fi
+
+function lttng_create_session_uri
+{
+       # Create session with default path
+       $TESTDIR/../src/bin/lttng/$LTTNG_BIN create $SESSION_NAME \
+               -U net://localhost >/dev/null 2>&1
+       ok $? "Create session with default path"
+}
+
+function report_errors
+{
+       test_thread_error_string="$1"
+       test_relayd="$2"
+       err_no_relayd_match="Error querying relayd health"
+
+       # Check for health errors
+       # Include inability to contact relayd health as an expected
+       # error, since this can happen whenever the relayd shutdown due
+       # to an error in any thread.
+       out=$(grep "${test_thread_error_string}" ${STDOUT_PATH} | wc -l)
+       if [ $test_relayd -ne 0 ]; then
+               outerr=$(grep "${err_no_relayd_match}" ${STDERR_PATH} | wc -l)
+       else
+               outerr=0
+       fi
+       if [ $out -eq 0 ] && [ $outerr -eq 0 ]; then
+               fail "Validation failure"
+               diag "Health returned:"
+               diag "stdout:"
+               file=${STDOUT_PATH}
+               while read line ; do
+                   diag "$line"
+               done < ${file}
+
+               diag "stderr:"
+               file=${STDERR_PATH}
+               while read line ; do
+                   diag "$line"
+               done < ${file}
+       else
+               pass "Validation OK"
+       fi
+}
+
+function test_health
+{
+       test_suffix="$1"
+       test_thread_name="$2"
+       test_thread_error_string="$3"
+       test_needs_root="$4"
+       test_consumerd="$5"
+       test_relayd="$6"
+
+       diag "Test health problem detection with ${test_thread_name}"
+
+       # Set the socket timeout to 5 so the health check detection
+       # happens within 25 s
+       export LTTNG_NETWORK_SOCKET_TIMEOUT=5
+       export LTTNG_RELAYD_HEALTH="${HEALTH_PATH}/test-health"
+
+       # Activate testpoints
+       export LTTNG_TESTPOINT_ENABLE=1
+
+       # Activate specific thread test
+       export ${test_thread_name}_${test_suffix}=1
+
+       # Spawn sessiond with preload healthexit lib
+       export LD_PRELOAD="$CURDIR/$SESSIOND_PRELOAD"
+
+       diag "Start session daemon"
+       start_lttng_sessiond
+
+       if [ ${test_consumerd} -eq 1 ]; then
+               create_lttng_session_no_output $SESSION_NAME
+
+               diag "With UST consumer daemons"
+               enable_ust_lttng_event $SESSION_NAME $UST_EVENT_NAME $CHANNEL_NAME
+
+               skip $isroot "Root access is needed. Skipping kernel consumer health check test." "1" ||
+               {
+                       diag "With kernel consumer daemon"
+                       lttng_enable_kernel_event $SESSION_NAME $KERNEL_EVENT_NAME $CHANNEL_NAME
+               }
+               start_lttng_tracing $SESSION_NAME
+       fi
+
+       if [ ${test_relayd} -eq 1 ]; then
+               diag "With relay daemon"
+               RELAYD_ARGS="--relayd-path=${LTTNG_RELAYD_HEALTH}"
+
+               start_lttng_relayd "-o $TRACE_PATH"
+       else
+               RELAYD_ARGS=
+       fi
+
+       # Check health status, not caring about result
+       $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
+               > /dev/null
+
+       # Wait
+       diag "Check after running for ${SLEEP_TIME} seconds"
+       sleep ${SLEEP_TIME}
+
+       # Check health status
+       $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
+               > ${STDOUT_PATH} 2> ${STDERR_PATH}
+
+
+       if [ ${test_needs_root} -eq 1 ]; then
+               skip ${isroot} "Root access needed for test \"${test_thread_name}\"." "1" ||
+               {
+                       report_errors "${test_thread_error_string}" "${test_relayd}"
+               }
+       else
+               report_errors "${test_thread_error_string}" "${test_relayd}"
+       fi
+
+       if [ ${test_relayd} -eq 1 ]; then
+               stop_lttng_relayd_nocheck
+       fi
+       stop_lttng_sessiond
+
+       unset LTTNG_TESTPOINT_ENABLE
+       unset ${test_thread_name}_${test_suffix}
+       unset LD_PRELOAD
+       unset LTTNG_NETWORK_SOCKET_TIMEOUT
+       unset LTTNG_RELAYD_HEALTH
+}
+
+plan_tests $NUM_TESTS
+
+print_test_banner "$TEST_DESC"
+
+THREAD=("LTTNG_SESSIOND_THREAD_MANAGE_CLIENTS"
+       "LTTNG_SESSIOND_THREAD_MANAGE_APPS"
+       "LTTNG_SESSIOND_THREAD_REG_APPS"
+       "LTTNG_SESSIOND_THREAD_HT_CLEANUP"
+       "LTTNG_SESSIOND_THREAD_APP_MANAGE_NOTIFY"
+       "LTTNG_SESSIOND_THREAD_APP_REG_DISPATCH"
+       "LTTNG_SESSIOND_THREAD_MANAGE_KERNEL"
+
+       "LTTNG_CONSUMERD_THREAD_CHANNEL"
+       "LTTNG_CONSUMERD_THREAD_METADATA"
+       "LTTNG_CONSUMERD_THREAD_METADATA_TIMER"
+
+       "LTTNG_RELAYD_THREAD_DISPATCHER"
+       "LTTNG_RELAYD_THREAD_WORKER"
+       "LTTNG_RELAYD_THREAD_LISTENER"
+       "LTTNG_RELAYD_THREAD_LIVE_DISPATCHER"
+       "LTTNG_RELAYD_THREAD_LIVE_WORKER"
+       "LTTNG_RELAYD_THREAD_LIVE_LISTENER"
+)
+
+ERROR_STRING=(
+       "Thread \"Session daemon command\" is not responding in component \"sessiond\"."
+       "Thread \"Session daemon application manager\" is not responding in component \"sessiond\"."
+       "Thread \"Session daemon application registration\" is not responding in component \"sessiond\"."
+       "Thread \"Session daemon hash table cleanup\" is not responding in component \"sessiond\"."
+       "Thread \"Session daemon application notification manager\" is not responding in component \"sessiond\"."
+       "Thread \"Session daemon application registration dispatcher\" is not responding in component \"sessiond\"."
+       "Thread \"Session daemon kernel\" is not responding in component \"sessiond\"."
+
+       "Thread \"Consumer daemon channel\" is not responding"
+       "Thread \"Consumer daemon metadata\" is not responding"
+       "Thread \"Consumer daemon metadata timer\" is not responding"
+
+       "Thread \"Relay daemon dispatcher\" is not responding in component \"relayd\"."
+       "Thread \"Relay daemon worker\" is not responding in component \"relayd\"."
+       "Thread \"Relay daemon listener\" is not responding in component \"relayd\"."
+       "Thread \"Relay daemon live dispatcher\" is not responding in component \"relayd\"."
+       "Thread \"Relay daemon live worker\" is not responding in component \"relayd\"."
+       "Thread \"Relay daemon live listener\" is not responding in component \"relayd\"."
+)
+
+# TODO
+# "LTTNG_SESSIOND_THREAD_MANAGE_CONSUMER"
+# "Thread \"Session daemon manage consumer\" is not responding in component \"sessiond\"."
+
+# TODO: test kernel consumerd specifically in addition to UST consumerd
+
+# TODO: need refactoring of consumerd teardown
+# "LTTNG_CONSUMERD_THREAD_SESSIOND"
+# "Thread \"Consumer daemon session daemon command manager\" is not responding"
+
+# TODO: this thread is responsible for close a file descriptor that
+# triggers teardown of metadata thread. We should revisit teardown of
+# consumerd.
+# "LTTNG_CONSUMERD_THREAD_DATA"
+# "Thread \"Consumer daemon data\" is not responding"
+
+NEEDS_ROOT=(
+       0
+       0
+       0
+       0
+       0
+       0
+       1
+
+       0
+       0
+       0
+
+       0
+       0
+       0
+       0
+       0
+       0
+)
+
+TEST_CONSUMERD=(
+       0
+       0
+       0
+       0
+       0
+       0
+       0
+
+       1
+       1
+       1
+
+       1
+       1
+       1
+       1
+       1
+       1
+)
+
+TEST_RELAYD=(
+       0
+       0
+       0
+       0
+       0
+       0
+       0
+
+       0
+       0
+       0
+
+       1
+       1
+       1
+       1
+       1
+       1
+)
+
+STDOUT_PATH=$(mktemp)
+STDERR_PATH=$(mktemp)
+TRACE_PATH=$(mktemp -d)
+HEALTH_PATH=$(mktemp -d)
+
+if [ "$(id -u)" == "0" ]; then
+       isroot=1
+else
+       isroot=0
+fi
+
+THREAD_COUNT=${#THREAD[@]}
+i=0
+while [ "$i" -lt "$THREAD_COUNT" ]; do
+       test_health "${TEST_SUFFIX}" \
+               "${THREAD[$i]}" \
+               "${ERROR_STRING[$i]}" \
+               "${NEEDS_ROOT[$i]}" \
+               "${TEST_CONSUMERD[$i]}" \
+               "${TEST_RELAYD[$i]}"
+       let "i++"
+done
+
+rm -rf ${HEALTH_PATH}
+rm -rf ${TRACE_PATH}
+rm -f ${STDOUT_PATH}
+rm -f ${STDERR_PATH}
This page took 0.025491 seconds and 4 git commands to generate.