40ca2bd12a6aa41dd72caf6ba4f2f7aca8c0dd68
[lttng-tools.git] / src / bin / lttng-sessiond / action-executor.c
1 /*
2 * Copyright (C) 2020 Jérémie Galarneau <jeremie.galarneau@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include "action-executor.h"
9 #include "cmd.h"
10 #include "health-sessiond.h"
11 #include "lttng-sessiond.h"
12 #include "notification-thread-internal.h"
13 #include "session.h"
14 #include "thread.h"
15 #include <common/macros.h>
16 #include <common/optional.h>
17 #include <lttng/action/action-internal.h>
18 #include <lttng/action/group.h>
19 #include <lttng/action/notify.h>
20 #include <lttng/action/rotate-session.h>
21 #include <lttng/action/snapshot-session.h>
22 #include <lttng/action/start-session.h>
23 #include <lttng/action/stop-session.h>
24 #include <lttng/condition/evaluation.h>
25 #include <lttng/lttng-error.h>
26 #include <lttng/trigger/trigger-internal.h>
27 #include <pthread.h>
28 #include <stdbool.h>
29 #include <stddef.h>
30 #include <urcu/list.h>
31
32 #define THREAD_NAME "Action Executor"
33 #define MAX_QUEUED_WORK_COUNT 8192
34
35 struct action_work_item {
36 uint64_t id;
37 struct lttng_trigger *trigger;
38 struct lttng_evaluation *evaluation;
39 struct notification_client_list *client_list;
40 LTTNG_OPTIONAL(struct lttng_credentials) object_creds;
41 struct cds_list_head list_node;
42 };
43
44 struct action_executor {
45 struct lttng_thread *thread;
46 struct notification_thread_handle *notification_thread_handle;
47 struct {
48 uint64_t pending_count;
49 struct cds_list_head list;
50 pthread_cond_t cond;
51 pthread_mutex_t lock;
52 } work;
53 bool should_quit;
54 uint64_t next_work_item_id;
55 };
56
57 typedef int (*action_executor_handler)(struct action_executor *executor,
58 const struct action_work_item *,
59 const struct lttng_action *action);
60
61 static int action_executor_notify_handler(struct action_executor *executor,
62 const struct action_work_item *,
63 const struct lttng_action *);
64 static int action_executor_start_session_handler(struct action_executor *executor,
65 const struct action_work_item *,
66 const struct lttng_action *);
67 static int action_executor_stop_session_handler(struct action_executor *executor,
68 const struct action_work_item *,
69 const struct lttng_action *);
70 static int action_executor_rotate_session_handler(struct action_executor *executor,
71 const struct action_work_item *,
72 const struct lttng_action *);
73 static int action_executor_snapshot_session_handler(struct action_executor *executor,
74 const struct action_work_item *,
75 const struct lttng_action *);
76 static int action_executor_group_handler(struct action_executor *executor,
77 const struct action_work_item *,
78 const struct lttng_action *);
79 static int action_executor_generic_handler(struct action_executor *executor,
80 const struct action_work_item *,
81 const struct lttng_action *);
82
83 static const action_executor_handler action_executors[] = {
84 [LTTNG_ACTION_TYPE_NOTIFY] = action_executor_notify_handler,
85 [LTTNG_ACTION_TYPE_START_SESSION] = action_executor_start_session_handler,
86 [LTTNG_ACTION_TYPE_STOP_SESSION] = action_executor_stop_session_handler,
87 [LTTNG_ACTION_TYPE_ROTATE_SESSION] = action_executor_rotate_session_handler,
88 [LTTNG_ACTION_TYPE_SNAPSHOT_SESSION] = action_executor_snapshot_session_handler,
89 [LTTNG_ACTION_TYPE_GROUP] = action_executor_group_handler,
90 };
91
92 static const char *action_type_names[] = {
93 [LTTNG_ACTION_TYPE_NOTIFY] = "Notify",
94 [LTTNG_ACTION_TYPE_START_SESSION] = "Start session",
95 [LTTNG_ACTION_TYPE_STOP_SESSION] = "Stop session",
96 [LTTNG_ACTION_TYPE_ROTATE_SESSION] = "Rotate session",
97 [LTTNG_ACTION_TYPE_SNAPSHOT_SESSION] = "Snapshot session",
98 [LTTNG_ACTION_TYPE_GROUP] = "Group",
99 };
100
101 static const char *get_action_name(const struct lttng_action *action)
102 {
103 return action_type_names[lttng_action_get_type_const(action)];
104 }
105
106 static int client_handle_transmission_status(
107 struct notification_client *client,
108 enum client_transmission_status status,
109 void *user_data)
110 {
111 int ret = 0;
112 struct action_executor *executor = user_data;
113 bool update_communication = true;
114
115 ASSERT_LOCKED(client->lock);
116
117 switch (status) {
118 case CLIENT_TRANSMISSION_STATUS_COMPLETE:
119 DBG("Successfully sent full notification to client, client_id = %" PRIu64,
120 client->id);
121 update_communication = false;
122 break;
123 case CLIENT_TRANSMISSION_STATUS_QUEUED:
124 DBG("Queued notification in client outgoing buffer, client_id = %" PRIu64,
125 client->id);
126 break;
127 case CLIENT_TRANSMISSION_STATUS_FAIL:
128 DBG("Communication error occurred while sending notification to client, client_id = %" PRIu64,
129 client->id);
130 client->communication.active = false;
131 break;
132 default:
133 ERR("Fatal error encoutered while sending notification to client, client_id = %" PRIu64,
134 client->id);
135 client->communication.active = false;
136 ret = -1;
137 goto end;
138 }
139
140 if (!update_communication) {
141 goto end;
142 }
143
144 ret = notification_thread_client_communication_update(
145 executor->notification_thread_handle, client->id,
146 status);
147 end:
148 return ret;
149 }
150
151 static int action_executor_notify_handler(struct action_executor *executor,
152 const struct action_work_item *work_item,
153 const struct lttng_action *action)
154 {
155 return notification_client_list_send_evaluation(work_item->client_list,
156 lttng_trigger_get_const_condition(work_item->trigger),
157 work_item->evaluation,
158 lttng_trigger_get_credentials(work_item->trigger),
159 LTTNG_OPTIONAL_GET_PTR(work_item->object_creds),
160 client_handle_transmission_status,
161 executor);
162 }
163
164 static int action_executor_start_session_handler(struct action_executor *executor,
165 const struct action_work_item *work_item,
166 const struct lttng_action *action)
167 {
168 int ret = 0;
169 const char *session_name;
170 enum lttng_action_status action_status;
171 struct ltt_session *session;
172
173 action_status = lttng_action_start_session_get_session_name(
174 action, &session_name);
175 if (action_status != LTTNG_ACTION_STATUS_OK) {
176 ERR("Failed to get session name from \"%s\" action",
177 get_action_name(action));
178 ret = -1;
179 goto end;
180 }
181
182 session_lock_list();
183 session = session_find_by_name(session_name);
184 if (session) {
185 enum lttng_error_code cmd_ret;
186
187 session_lock(session);
188 cmd_ret = cmd_start_trace(session);
189 session_unlock(session);
190
191 switch (cmd_ret) {
192 case LTTNG_OK:
193 DBG("Successfully started session \"%s\" on behalf of trigger \"%p\"",
194 session_name,
195 work_item->trigger);
196 break;
197 case LTTNG_ERR_TRACE_ALREADY_STARTED:
198 DBG("Attempted to start session \"%s\" on behalf of trigger \"%p\" but it was already started",
199 session_name,
200 work_item->trigger);
201 break;
202 default:
203 WARN("Failed to start session \"%s\" on behalf of trigger \"%p\": %s",
204 session_name,
205 work_item->trigger,
206 lttng_strerror(-cmd_ret));
207 break;
208 }
209 session_put(session);
210 } else {
211 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%p\"",
212 session_name, get_action_name(action),
213 work_item->trigger);
214 }
215 session_unlock_list();
216 end:
217 return ret;
218 }
219
220 static int action_executor_stop_session_handler(struct action_executor *executor,
221 const struct action_work_item *work_item,
222 const struct lttng_action *action)
223 {
224 int ret = 0;
225 const char *session_name;
226 enum lttng_action_status action_status;
227 struct ltt_session *session;
228
229 action_status = lttng_action_stop_session_get_session_name(
230 action, &session_name);
231 if (action_status != LTTNG_ACTION_STATUS_OK) {
232 ERR("Failed to get session name from \"%s\" action",
233 get_action_name(action));
234 ret = -1;
235 goto end;
236 }
237
238 session_lock_list();
239 session = session_find_by_name(session_name);
240 if (session) {
241 enum lttng_error_code cmd_ret;
242
243 session_lock(session);
244 cmd_ret = cmd_stop_trace(session);
245 session_unlock(session);
246
247 switch (cmd_ret) {
248 case LTTNG_OK:
249 DBG("Successfully stopped session \"%s\" on behalf of trigger \"%p\"",
250 session_name,
251 work_item->trigger);
252 break;
253 case LTTNG_ERR_TRACE_ALREADY_STOPPED:
254 DBG("Attempted to stop session \"%s\" on behalf of trigger \"%p\" but it was already stopped",
255 session_name,
256 work_item->trigger);
257 break;
258 default:
259 WARN("Failed to stop session \"%s\" on behalf of trigger \"%p\": %s",
260 session_name,
261 work_item->trigger,
262 lttng_strerror(-cmd_ret));
263 break;
264 }
265 session_put(session);
266 } else {
267 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%p\"",
268 session_name, get_action_name(action),
269 work_item->trigger);
270 }
271 session_unlock_list();
272 end:
273 return ret;
274 }
275
276 static int action_executor_rotate_session_handler(struct action_executor *executor,
277 const struct action_work_item *work_item,
278 const struct lttng_action *action)
279 {
280 int ret = 0;
281 const char *session_name;
282 enum lttng_action_status action_status;
283 struct ltt_session *session;
284
285 action_status = lttng_action_rotate_session_get_session_name(
286 action, &session_name);
287 if (action_status != LTTNG_ACTION_STATUS_OK) {
288 ERR("Failed to get session name from \"%s\" action",
289 get_action_name(action));
290 ret = -1;
291 goto end;
292 }
293
294 session_lock_list();
295 session = session_find_by_name(session_name);
296 if (session) {
297 enum lttng_error_code cmd_ret;
298
299 session_lock(session);
300 cmd_ret = cmd_rotate_session(session, NULL, false,
301 LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED);
302 session_unlock(session);
303
304 switch (cmd_ret) {
305 case LTTNG_OK:
306 DBG("Successfully started rotation of session \"%s\" on behalf of trigger \"%p\"",
307 session_name,
308 work_item->trigger);
309 break;
310 case LTTNG_ERR_ROTATION_PENDING:
311 DBG("Attempted to start a rotation of session \"%s\" on behalf of trigger \"%p\" but a rotation is already ongoing",
312 session_name,
313 work_item->trigger);
314 break;
315 case LTTNG_ERR_ROTATION_MULTIPLE_AFTER_STOP:
316 case LTTNG_ERR_ROTATION_AFTER_STOP_CLEAR:
317 DBG("Attempted to start a rotation of session \"%s\" on behalf of trigger \"%p\" but a rotation has already been completed since the last stop or clear",
318 session_name,
319 work_item->trigger);
320 break;
321 default:
322 WARN("Failed to start a rotation of session \"%s\" on behalf of trigger \"%p\": %s",
323 session_name,
324 work_item->trigger,
325 lttng_strerror(-cmd_ret));
326 break;
327 }
328 session_put(session);
329 } else {
330 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%p\"",
331 session_name, get_action_name(action),
332 work_item->trigger);
333 }
334 session_unlock_list();
335 end:
336 return ret;
337 }
338
339 static int action_executor_snapshot_session_handler(struct action_executor *executor,
340 const struct action_work_item *work_item,
341 const struct lttng_action *action)
342 {
343 int ret = 0;
344 const char *session_name;
345 enum lttng_action_status action_status;
346 struct ltt_session *session;
347 const struct lttng_snapshot_output default_snapshot_output = {
348 .max_size = UINT64_MAX,
349 };
350 const struct lttng_snapshot_output *snapshot_output =
351 &default_snapshot_output;
352
353 action_status = lttng_action_snapshot_session_get_session_name(
354 action, &session_name);
355 if (action_status != LTTNG_ACTION_STATUS_OK) {
356 ERR("Failed to get session name from \"%s\" action",
357 get_action_name(action));
358 ret = -1;
359 goto end;
360 }
361
362 action_status = lttng_action_snapshot_session_get_output(
363 action, &snapshot_output);
364 if (action_status != LTTNG_ACTION_STATUS_OK &&
365 action_status != LTTNG_ACTION_STATUS_UNSET) {
366 ERR("Failed to get output from \"%s\" action",
367 get_action_name(action));
368 ret = -1;
369 goto end;
370 }
371
372 session_lock_list();
373 session = session_find_by_name(session_name);
374 if (session) {
375 enum lttng_error_code cmd_ret;
376
377 session_lock(session);
378 cmd_ret = cmd_snapshot_record(session, snapshot_output, 0);
379 session_unlock(session);
380
381 switch (cmd_ret) {
382 case LTTNG_OK:
383 DBG("Successfully recorded snapshot of session \"%s\" on behalf of trigger \"%p\"",
384 session_name,
385 work_item->trigger);
386 break;
387 default:
388 WARN("Failed to record snapshot of session \"%s\" on behalf of trigger \"%p\": %s",
389 session_name,
390 work_item->trigger,
391 lttng_strerror(-cmd_ret));
392 break;
393 }
394 session_put(session);
395 } else {
396 DBG("Failed to find session \"%s\" by name while executing \"%s\" action of trigger \"%p\"",
397 session_name, get_action_name(action),
398 work_item->trigger);
399 }
400 session_unlock_list();
401 end:
402 return ret;
403 }
404
405 static int action_executor_group_handler(struct action_executor *executor,
406 const struct action_work_item *work_item,
407 const struct lttng_action *action_group)
408 {
409 int ret = 0;
410 unsigned int i, count;
411 enum lttng_action_status action_status;
412
413 action_status = lttng_action_group_get_count(action_group, &count);
414 if (action_status != LTTNG_ACTION_STATUS_OK) {
415 /* Fatal error. */
416 ERR("Failed to get count of action in action group");
417 ret = -1;
418 goto end;
419 }
420
421 DBG("Action group has %u action%s", count, count != 1 ? "s" : "");
422 for (i = 0; i < count; i++) {
423 const struct lttng_action *action =
424 lttng_action_group_get_at_index(
425 action_group, i);
426
427 ret = action_executor_generic_handler(
428 executor, work_item, action);
429 if (ret) {
430 ERR("Stopping the execution of the action group of trigger \"%p\" following a fatal error",
431 work_item->trigger);
432 goto end;
433 }
434 }
435 end:
436 return ret;
437 }
438
439 static int action_executor_generic_handler(struct action_executor *executor,
440 const struct action_work_item *work_item,
441 const struct lttng_action *action)
442 {
443 DBG("Executing action \"%s\" of trigger \"%p\" action work item %" PRIu64,
444 get_action_name(action),
445 work_item->trigger,
446 work_item->id);
447
448 return action_executors[lttng_action_get_type_const(action)](
449 executor, work_item, action);
450 }
451
452 static int action_work_item_execute(struct action_executor *executor,
453 struct action_work_item *work_item)
454 {
455 int ret;
456 const struct lttng_action *action =
457 lttng_trigger_get_const_action(work_item->trigger);
458
459 DBG("Starting execution of action work item %" PRIu64 " of trigger \"%p\"",
460 work_item->id, work_item->trigger);
461 ret = action_executor_generic_handler(executor, work_item, action);
462 DBG("Completed execution of action work item %" PRIu64 " of trigger \"%p\"",
463 work_item->id, work_item->trigger);
464 return ret;
465 }
466
467 static void action_work_item_destroy(struct action_work_item *work_item)
468 {
469 lttng_trigger_put(work_item->trigger);
470 lttng_evaluation_destroy(work_item->evaluation);
471 notification_client_list_put(work_item->client_list);
472 free(work_item);
473 }
474
475 static void *action_executor_thread(void *_data)
476 {
477 struct action_executor *executor = _data;
478
479 assert(executor);
480
481 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_ACTION_EXECUTOR);
482
483 rcu_register_thread();
484 rcu_thread_online();
485
486 DBG("Entering work execution loop");
487 pthread_mutex_lock(&executor->work.lock);
488 while (!executor->should_quit) {
489 int ret;
490 struct action_work_item *work_item;
491
492 health_code_update();
493 if (executor->work.pending_count == 0) {
494 health_poll_entry();
495 DBG("No work items enqueued, entering wait");
496 pthread_cond_wait(&executor->work.cond,
497 &executor->work.lock);
498 DBG("Woke-up from wait");
499 health_poll_exit();
500 continue;
501 }
502
503 /* Pop item from front of the listwith work lock held. */
504 work_item = cds_list_first_entry(&executor->work.list,
505 struct action_work_item, list_node);
506 cds_list_del(&work_item->list_node);
507 executor->work.pending_count--;
508
509 /*
510 * Work can be performed without holding the work lock,
511 * allowing new items to be queued.
512 */
513 pthread_mutex_unlock(&executor->work.lock);
514 ret = action_work_item_execute(executor, work_item);
515 action_work_item_destroy(work_item);
516 if (ret) {
517 /* Fatal error. */
518 break;
519 }
520
521 health_code_update();
522 pthread_mutex_lock(&executor->work.lock);
523 }
524
525 pthread_mutex_unlock(&executor->work.lock);
526 DBG("Left work execution loop");
527
528 health_code_update();
529
530 rcu_thread_offline();
531 rcu_unregister_thread();
532 health_unregister(health_sessiond);
533
534 return NULL;
535 }
536
537 static bool shutdown_action_executor_thread(void *_data)
538 {
539 struct action_executor *executor = _data;
540
541 executor->should_quit = true;
542 pthread_cond_signal(&executor->work.cond);
543 return true;
544 }
545
546 static void clean_up_action_executor_thread(void *_data)
547 {
548 struct action_executor *executor = _data;
549
550 assert(cds_list_empty(&executor->work.list));
551
552 pthread_mutex_destroy(&executor->work.lock);
553 pthread_cond_destroy(&executor->work.cond);
554 free(executor);
555 }
556
557 struct action_executor *action_executor_create(
558 struct notification_thread_handle *handle)
559 {
560 struct action_executor *executor = zmalloc(sizeof(*executor));
561
562 if (!executor) {
563 goto end;
564 }
565
566 CDS_INIT_LIST_HEAD(&executor->work.list);
567 pthread_cond_init(&executor->work.cond, NULL);
568 pthread_mutex_init(&executor->work.lock, NULL);
569 executor->notification_thread_handle = handle;
570
571 executor->thread = lttng_thread_create(THREAD_NAME,
572 action_executor_thread, shutdown_action_executor_thread,
573 clean_up_action_executor_thread, executor);
574 end:
575 return executor;
576 }
577
578 void action_executor_destroy(struct action_executor *executor)
579 {
580 struct action_work_item *work_item, *tmp;
581
582 /* TODO Wait for work list to drain? */
583 lttng_thread_shutdown(executor->thread);
584 pthread_mutex_lock(&executor->work.lock);
585 if (executor->work.pending_count != 0) {
586 WARN("%" PRIu64
587 " trigger action%s still queued for execution and will be discarded",
588 executor->work.pending_count,
589 executor->work.pending_count == 1 ? " is" :
590 "s are");
591 }
592
593 cds_list_for_each_entry_safe (
594 work_item, tmp, &executor->work.list, list_node) {
595 WARN("Discarding action work item %" PRIu64
596 " associated to trigger \"%p\"",
597 work_item->id, work_item->trigger);
598 cds_list_del(&work_item->list_node);
599 action_work_item_destroy(work_item);
600 }
601 pthread_mutex_unlock(&executor->work.lock);
602 lttng_thread_put(executor->thread);
603 }
604
605 /* RCU read-lock must be held by the caller. */
606 enum action_executor_status action_executor_enqueue(
607 struct action_executor *executor,
608 struct lttng_trigger *trigger,
609 struct lttng_evaluation *evaluation,
610 const struct lttng_credentials *object_creds,
611 struct notification_client_list *client_list)
612 {
613 enum action_executor_status executor_status = ACTION_EXECUTOR_STATUS_OK;
614 const uint64_t work_item_id = executor->next_work_item_id++;
615 struct action_work_item *work_item;
616 bool signal = false;
617
618 pthread_mutex_lock(&executor->work.lock);
619 /* Check for queue overflow. */
620 if (executor->work.pending_count >= MAX_QUEUED_WORK_COUNT) {
621 /* Most likely spammy, remove if it is the case. */
622 DBG("Refusing to enqueue action for trigger \"%p\" as work item %" PRIu64
623 " (overflow)",
624 trigger, work_item_id);
625 executor_status = ACTION_EXECUTOR_STATUS_OVERFLOW;
626 goto error_unlock;
627 }
628
629 work_item = zmalloc(sizeof(*work_item));
630 if (!work_item) {
631 PERROR("Failed to allocate action executor work item on behalf of trigger \"%p\"",
632 trigger);
633 executor_status = ACTION_EXECUTOR_STATUS_ERROR;
634 goto error_unlock;
635 }
636
637 lttng_trigger_get(trigger);
638 if (client_list) {
639 const bool reference_acquired =
640 notification_client_list_get(client_list);
641
642 assert(reference_acquired);
643 }
644
645 *work_item = (typeof(*work_item)){
646 .id = work_item_id,
647 .trigger = trigger,
648 /* Ownership transferred to the work item. */
649 .evaluation = evaluation,
650 .object_creds = {
651 .is_set = !!object_creds,
652 .value = object_creds ? *object_creds :
653 (typeof(work_item->object_creds.value)) {},
654 },
655 .client_list = client_list,
656 .list_node = CDS_LIST_HEAD_INIT(work_item->list_node),
657 };
658
659 evaluation = NULL;
660 cds_list_add_tail(&work_item->list_node, &executor->work.list);
661 executor->work.pending_count++;
662 DBG("Enqueued action for trigger \"%p\" as work item %" PRIu64,
663 trigger, work_item_id);
664 signal = true;
665
666 error_unlock:
667 pthread_mutex_unlock(&executor->work.lock);
668 if (signal) {
669 pthread_cond_signal(&executor->work.cond);
670 }
671
672 lttng_evaluation_destroy(evaluation);
673 return executor_status;
674 }
This page took 0.067544 seconds and 3 git commands to generate.