c59bab78239f2833c9887e741be7840099d4328f
[lttng-tools.git] / tests / regression / tools / health / test_health.sh
1 # Copyright (C) 2012 Christian Babeux <christian.babeux@efficios.com>
2 # Copyright (C) 2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #
4 # SPDX-License-Identifier: GPL-2.0-only
5
6 TESTDIR=${CURDIR}/../../..
7 UST_EVENT_NAME="tp:tptest"
8 KERNEL_EVENT_NAME="sched_switch"
9 CHANNEL_NAME="testchan"
10 HEALTH_CHECK_BIN="health_check"
11 NUM_TESTS=99
12 SLEEP_TIME=30
13
14 source $TESTDIR/utils/utils.sh
15
16 function report_errors
17 {
18 test_thread_error_string="$1"
19 test_relayd="$2"
20 err_no_relayd_match="Error querying relayd health"
21
22 # Check for health errors
23 # Include inability to contact relayd health as an expected
24 # error, since this can happen whenever the relayd shutdown due
25 # to an error in any thread.
26 out=$(grep "${test_thread_error_string}" ${STDOUT_PATH} | wc -l)
27 if [ $test_relayd -ne 0 ]; then
28 outerr=$(grep "${err_no_relayd_match}" ${STDERR_PATH} | wc -l)
29 else
30 outerr=0
31 fi
32 if [ $out -eq 0 ] && [ $outerr -eq 0 ]; then
33 fail "Validation failure"
34 diag "Health returned:"
35 diag "stdout:"
36 file=${STDOUT_PATH}
37 while read line ; do
38 diag "$line"
39 done < ${file}
40
41 diag "stderr:"
42 file=${STDERR_PATH}
43 while read line ; do
44 diag "$line"
45 done < ${file}
46 else
47 pass "Validation OK"
48 fi
49 }
50
51 function test_health
52 {
53 test_suffix="$1"
54 test_thread_name="$2"
55 test_thread_error_string="$3"
56 test_needs_root="$4"
57 test_consumerd="$5"
58 test_relayd="$6"
59
60 diag "Test health problem detection with ${test_thread_name}"
61
62 # Set the socket timeout to 5 so the health check detection
63 # happens within 25 s
64 export LTTNG_NETWORK_SOCKET_TIMEOUT=5
65 export LTTNG_RELAYD_HEALTH="${HEALTH_PATH}/test-health"
66
67 # Activate testpoints
68 export LTTNG_TESTPOINT_ENABLE=1
69
70 # Activate specific thread test
71 export ${test_thread_name}_${test_suffix}=1
72
73 # Spawn sessiond with preloaded testpoint override lib
74 export LD_PRELOAD="$CURDIR/$SESSIOND_PRELOAD"
75
76 diag "Start session daemon"
77 start_lttng_sessiond
78
79 if [ ${test_consumerd} -eq 1 ]; then
80 create_lttng_session_no_output $SESSION_NAME
81
82 diag "With UST consumer daemons"
83 enable_ust_lttng_event_ok $SESSION_NAME $UST_EVENT_NAME $CHANNEL_NAME
84
85 skip $isroot "Root access is needed. Skipping kernel consumer health check test." "1" ||
86 {
87 diag "With kernel consumer daemon"
88 lttng_enable_kernel_event $SESSION_NAME $KERNEL_EVENT_NAME $CHANNEL_NAME
89 }
90 start_lttng_tracing_ok $SESSION_NAME
91 fi
92
93 if [ ${test_relayd} -eq 1 ]; then
94 diag "With relay daemon"
95 RELAYD_ARGS="--relayd-path=${LTTNG_RELAYD_HEALTH}"
96
97 start_lttng_relayd "-o $TRACE_PATH"
98 else
99 RELAYD_ARGS=
100 fi
101
102 # Check health status, not caring about result
103 $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
104 > /dev/null
105
106 # Wait
107 diag "Check after running for ${SLEEP_TIME} seconds"
108 sleep ${SLEEP_TIME}
109
110 # Check health status
111 $CURDIR/$HEALTH_CHECK_BIN ${RELAYD_ARGS} \
112 > ${STDOUT_PATH} 2> ${STDERR_PATH}
113
114
115 if [ ${test_needs_root} -eq 1 ]; then
116 skip ${isroot} "Root access needed for test \"${test_thread_name}\"." "1" ||
117 {
118 report_errors "${test_thread_error_string}" "${test_relayd}"
119 }
120 else
121 report_errors "${test_thread_error_string}" "${test_relayd}"
122 fi
123
124 if [ ${test_relayd} -eq 1 ]; then
125 # We may fail to stop relayd here, and this is OK, since
126 # it may have been killed volountarily by testpoint.
127 stop_lttng_relayd_cleanup $KILL_SIGNAL
128 fi
129
130 if [ ${test_consumerd} -eq 1 ]; then
131 stop_lttng_consumerd $KILL_SIGNAL
132 fi
133 stop_lttng_sessiond $KILL_SIGNAL
134
135 unset LTTNG_TESTPOINT_ENABLE
136 unset ${test_thread_name}_${test_suffix}
137 unset LD_PRELOAD
138 unset LTTNG_NETWORK_SOCKET_TIMEOUT
139 unset LTTNG_RELAYD_HEALTH
140 }
141
142 plan_tests $NUM_TESTS
143
144 print_test_banner "$TEST_DESC"
145
146 if [ -f "$CURDIR/$SESSIOND_PRELOAD" ]; then
147 foundobj=1
148 else
149 foundobj=0
150 fi
151
152 skip $foundobj "No shared object generated. Skipping all tests." $NUM_TESTS && exit 0
153
154 THREAD=("LTTNG_SESSIOND_THREAD_MANAGE_CLIENTS"
155 "LTTNG_SESSIOND_THREAD_MANAGE_APPS"
156 "LTTNG_SESSIOND_THREAD_REG_APPS"
157 "LTTNG_SESSIOND_THREAD_HT_CLEANUP"
158 "LTTNG_SESSIOND_THREAD_APP_MANAGE_NOTIFY"
159 "LTTNG_SESSIOND_THREAD_APP_REG_DISPATCH"
160 "LTTNG_SESSIOND_THREAD_MANAGE_KERNEL"
161
162 "LTTNG_CONSUMERD_THREAD_CHANNEL"
163 "LTTNG_CONSUMERD_THREAD_METADATA"
164 "LTTNG_CONSUMERD_THREAD_METADATA_TIMER"
165
166 "LTTNG_RELAYD_THREAD_DISPATCHER"
167 "LTTNG_RELAYD_THREAD_WORKER"
168 "LTTNG_RELAYD_THREAD_LISTENER"
169 "LTTNG_RELAYD_THREAD_LIVE_DISPATCHER"
170 "LTTNG_RELAYD_THREAD_LIVE_WORKER"
171 "LTTNG_RELAYD_THREAD_LIVE_LISTENER"
172 )
173
174 ERROR_STRING=(
175 "Thread \"Session daemon command\" is not responding in component \"sessiond\"."
176 "Thread \"Session daemon application manager\" is not responding in component \"sessiond\"."
177 "Thread \"Session daemon application registration\" is not responding in component \"sessiond\"."
178 "Thread \"Session daemon hash table cleanup\" is not responding in component \"sessiond\"."
179 "Thread \"Session daemon application notification manager\" is not responding in component \"sessiond\"."
180 "Thread \"Session daemon application registration dispatcher\" is not responding in component \"sessiond\"."
181 "Thread \"Session daemon kernel\" is not responding in component \"sessiond\"."
182
183 "Thread \"Consumer daemon channel\" is not responding"
184 "Thread \"Consumer daemon metadata\" is not responding"
185 "Thread \"Consumer daemon metadata timer\" is not responding"
186
187 "Thread \"Relay daemon dispatcher\" is not responding in component \"relayd\"."
188 "Thread \"Relay daemon worker\" is not responding in component \"relayd\"."
189 "Thread \"Relay daemon listener\" is not responding in component \"relayd\"."
190 "Thread \"Relay daemon live dispatcher\" is not responding in component \"relayd\"."
191 "Thread \"Relay daemon live worker\" is not responding in component \"relayd\"."
192 "Thread \"Relay daemon live listener\" is not responding in component \"relayd\"."
193 )
194
195 # TODO
196 # "LTTNG_SESSIOND_THREAD_MANAGE_CONSUMER"
197 # "Thread \"Session daemon manage consumer\" is not responding in component \"sessiond\"."
198
199 # TODO: test kernel consumerd specifically in addition to UST consumerd
200
201 # TODO: need refactoring of consumerd teardown
202 # "LTTNG_CONSUMERD_THREAD_SESSIOND"
203 # "Thread \"Consumer daemon session daemon command manager\" is not responding"
204
205 # TODO: this thread is responsible for close a file descriptor that
206 # triggers teardown of metadata thread. We should revisit teardown of
207 # consumerd.
208 # "LTTNG_CONSUMERD_THREAD_DATA"
209 # "Thread \"Consumer daemon data\" is not responding"
210
211 NEEDS_ROOT=(
212 0
213 0
214 0
215 0
216 0
217 0
218 1
219
220 0
221 0
222 0
223
224 0
225 0
226 0
227 0
228 0
229 0
230 )
231
232 TEST_CONSUMERD=(
233 0
234 0
235 0
236 0
237 0
238 0
239 0
240
241 1
242 1
243 1
244
245 1
246 1
247 1
248 1
249 1
250 1
251 )
252
253 TEST_RELAYD=(
254 0
255 0
256 0
257 0
258 0
259 0
260 0
261
262 0
263 0
264 0
265
266 1
267 1
268 1
269 1
270 1
271 1
272 )
273
274 STDOUT_PATH=$(mktemp)
275 STDERR_PATH=$(mktemp)
276 TRACE_PATH=$(mktemp -d)
277 HEALTH_PATH=$(mktemp -d)
278
279 if [ "$(id -u)" == "0" ]; then
280 isroot=1
281 else
282 isroot=0
283 fi
284
285 THREAD_COUNT=${#THREAD[@]}
286 i=0
287 while [ "$i" -lt "$THREAD_COUNT" ]; do
288 test_health "${TEST_SUFFIX}" \
289 "${THREAD[$i]}" \
290 "${ERROR_STRING[$i]}" \
291 "${NEEDS_ROOT[$i]}" \
292 "${TEST_CONSUMERD[$i]}" \
293 "${TEST_RELAYD[$i]}"
294 let "i++"
295 done
296
297 rm -rf ${HEALTH_PATH}
298 rm -rf ${TRACE_PATH}
299 rm -f ${STDOUT_PATH}
300 rm -f ${STDERR_PATH}
This page took 0.034297 seconds and 3 git commands to generate.