ca969074382174efb466c6cc278d31676d5bceda
[lttng-tools.git] / src / bin / lttng-sessiond / action-executor.c
1 /*
2 * Copyright (C) 2020 Jérémie Galarneau <jeremie.galarneau@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #include "action-executor.h"
9 #include "cmd.h"
10 #include "health-sessiond.h"
11 #include "lttng-sessiond.h"
12 #include "notification-thread-internal.h"
13 #include "session.h"
14 #include "thread.h"
15 #include <common/macros.h>
16 #include <common/optional.h>
17 #include <lttng/action/action-internal.h>
18 #include <lttng/action/group.h>
19 #include <lttng/action/notify.h>
20 #include <lttng/action/rotate-session.h>
21 #include <lttng/action/snapshot-session.h>
22 #include <lttng/action/start-session.h>
23 #include <lttng/action/stop-session.h>
24 #include <lttng/condition/evaluation.h>
25 #include <lttng/lttng-error.h>
26 #include <lttng/trigger/trigger-internal.h>
27 #include <pthread.h>
28 #include <stdbool.h>
29 #include <stddef.h>
30 #include <urcu/list.h>
31
32 #define THREAD_NAME "Action Executor"
33 #define MAX_QUEUED_WORK_COUNT 8192
34
35 struct action_work_item {
36 uint64_t id;
37 struct lttng_trigger *trigger;
38 struct lttng_evaluation *evaluation;
39 struct notification_client_list *client_list;
40 LTTNG_OPTIONAL(struct lttng_credentials) object_creds;
41 struct cds_list_head list_node;
42 };
43
44 struct action_executor {
45 struct lttng_thread *thread;
46 struct notification_thread_handle *notification_thread_handle;
47 struct {
48 uint64_t pending_count;
49 struct cds_list_head list;
50 pthread_cond_t cond;
51 pthread_mutex_t lock;
52 } work;
53 bool should_quit;
54 uint64_t next_work_item_id;
55 };
56
57 /*
58 * Only return non-zero on a fatal error that should shut down the action
59 * executor.
60 */
61 typedef int (*action_executor_handler)(struct action_executor *executor,
62 const struct action_work_item *,
63 const struct lttng_action *action);
64
65 static int action_executor_notify_handler(struct action_executor *executor,
66 const struct action_work_item *,
67 const struct lttng_action *);
68 static int action_executor_start_session_handler(struct action_executor *executor,
69 const struct action_work_item *,
70 const struct lttng_action *);
71 static int action_executor_stop_session_handler(struct action_executor *executor,
72 const struct action_work_item *,
73 const struct lttng_action *);
74 static int action_executor_rotate_session_handler(struct action_executor *executor,
75 const struct action_work_item *,
76 const struct lttng_action *);
77 static int action_executor_snapshot_session_handler(struct action_executor *executor,
78 const struct action_work_item *,
79 const struct lttng_action *);
80 static int action_executor_group_handler(struct action_executor *executor,
81 const struct action_work_item *,
82 const struct lttng_action *);
83 static int action_executor_generic_handler(struct action_executor *executor,
84 const struct action_work_item *,
85 const struct lttng_action *);
86
87 static const action_executor_handler action_executors[] = {
88 [LTTNG_ACTION_TYPE_NOTIFY] = action_executor_notify_handler,
89 [LTTNG_ACTION_TYPE_START_SESSION] = action_executor_start_session_handler,
90 [LTTNG_ACTION_TYPE_STOP_SESSION] = action_executor_stop_session_handler,
91 [LTTNG_ACTION_TYPE_ROTATE_SESSION] = action_executor_rotate_session_handler,
92 [LTTNG_ACTION_TYPE_SNAPSHOT_SESSION] = action_executor_snapshot_session_handler,
93 [LTTNG_ACTION_TYPE_GROUP] = action_executor_group_handler,
94 };
95
96 static const char *action_type_names[] = {
97 [LTTNG_ACTION_TYPE_NOTIFY] = "Notify",
98 [LTTNG_ACTION_TYPE_START_SESSION] = "Start session",
99 [LTTNG_ACTION_TYPE_STOP_SESSION] = "Stop session",
100 [LTTNG_ACTION_TYPE_ROTATE_SESSION] = "Rotate session",
101 [LTTNG_ACTION_TYPE_SNAPSHOT_SESSION] = "Snapshot session",
102 [LTTNG_ACTION_TYPE_GROUP] = "Group",
103 };
104
105 static const char *get_action_name(const struct lttng_action *action)
106 {
107 return action_type_names[lttng_action_get_type(action)];
108 }
109
110 /* Check if this trigger allowed to interect with a given session. */
111 static bool is_trigger_allowed_for_session(const struct lttng_trigger *trigger,
112 struct ltt_session *session)
113 {
114 bool is_allowed = false;
115 const struct lttng_credentials session_creds = {
116 .uid = LTTNG_OPTIONAL_INIT_VALUE(session->uid),
117 .gid = LTTNG_OPTIONAL_INIT_VALUE(session->gid),
118 };
119 /* Can never be NULL. */
120 const struct lttng_credentials *trigger_creds =
121 lttng_trigger_get_credentials(trigger);
122
123 is_allowed = (lttng_credentials_is_equal_uid(trigger_creds, &session_creds)) ||
124 (lttng_credentials_get_uid(trigger_creds) == 0);
125 if (!is_allowed) {
126 WARN("Trigger is not allowed to interact with session `%s`: session uid = %ld, session gid = %ld, trigger uid = %ld",
127 session->name,
128 (long int) session->uid,
129 (long int) session->gid,
130 (long int) lttng_credentials_get_uid(trigger_creds));
131 }
132
133 return is_allowed;
134 }
135
136 static int client_handle_transmission_status(
137 struct notification_client *client,
138 enum client_transmission_status status,
139 void *user_data)
140 {
141 int ret = 0;
142 struct action_executor *executor = user_data;
143 bool update_communication = true;
144
145 switch (status) {
146 case CLIENT_TRANSMISSION_STATUS_COMPLETE:
147 DBG("Successfully sent full notification to client, client_id = %" PRIu64,
148 client->id);
149 update_communication = false;
150 break;
151 case CLIENT_TRANSMISSION_STATUS_QUEUED:
152 DBG("Queued notification in client outgoing buffer, client_id = %" PRIu64,
153 client->id);
154 break;
155 case CLIENT_TRANSMISSION_STATUS_FAIL:
156 DBG("Communication error occurred while sending notification to client, client_id = %" PRIu64,
157 client->id);
158 break;
159 default:
160 ERR("Fatal error encoutered while sending notification to client, client_id = %" PRIu64,
161 client->id);
162 ret = -1;
163 goto end;
164 }
165
166 if (!update_communication) {
167 goto end;
168 }
169
170 /* Safe to read client's id without locking as it is immutable. */
171 ret = notification_thread_client_communication_update(
172 executor->notification_thread_handle, client->id,
173 status);
174 end:
175 return ret;
176 }
177
178 static int action_executor_notify_handler(struct action_executor *executor,
179 const struct action_work_item *work_item,
180 const struct lttng_action *action)
181 {
182 return notification_client_list_send_evaluation(work_item->client_list,
183 lttng_trigger_get_const_condition(work_item->trigger),
184 work_item->evaluation,
185 lttng_trigger_get_credentials(work_item->trigger),
186 LTTNG_OPTIONAL_GET_PTR(work_item->object_creds),
187 client_handle_transmission_status,
188 executor);
189 }
190
191 static int action_executor_start_session_handler(struct action_executor *executor,
192 const struct action_work_item *work_item,
193 const struct lttng_action *action)
194 {
195 int ret = 0;
196 const char *session_name;
197 enum lttng_action_status action_status;
198 struct ltt_session *session;
199 enum lttng_error_code cmd_ret;
200
201 action_status = lttng_action_start_session_get_session_name(
202 action, &session_name);
203 if (action_status != LTTNG_ACTION_STATUS_OK) {
204 ERR("Failed to get session name from `%s` action",
205 get_action_name(action));
206 ret = -1;
207 goto end;
208 }
209
210 session_lock_list();
211 session = session_find_by_name(session_name);
212 if (!session) {
213 DBG("Failed to find session `%s` by name while executing `%s` action of trigger `%p`",
214 session_name, get_action_name(action),
215 work_item->trigger);
216 goto error_unlock_list;
217 }
218
219 session_lock(session);
220 if (!is_trigger_allowed_for_session(work_item->trigger, session)) {
221 goto error_dispose_session;
222 }
223
224 cmd_ret = cmd_start_trace(session);
225 switch (cmd_ret) {
226 case LTTNG_OK:
227 DBG("Successfully started session `%s` on behalf of trigger `%p`",
228 session_name, work_item->trigger);
229 break;
230 case LTTNG_ERR_TRACE_ALREADY_STARTED:
231 DBG("Attempted to start session `%s` on behalf of trigger `%p` but it was already started",
232 session_name, work_item->trigger);
233 break;
234 default:
235 WARN("Failed to start session `%s` on behalf of trigger `%p`: %s",
236 session_name, work_item->trigger,
237 lttng_strerror(-cmd_ret));
238 break;
239 }
240
241 error_dispose_session:
242 session_unlock(session);
243 session_put(session);
244 error_unlock_list:
245 session_unlock_list();
246 end:
247 return ret;
248 }
249
250 static int action_executor_stop_session_handler(struct action_executor *executor,
251 const struct action_work_item *work_item,
252 const struct lttng_action *action)
253 {
254 int ret = 0;
255 const char *session_name;
256 enum lttng_action_status action_status;
257 struct ltt_session *session;
258 enum lttng_error_code cmd_ret;
259
260 action_status = lttng_action_stop_session_get_session_name(
261 action, &session_name);
262 if (action_status != LTTNG_ACTION_STATUS_OK) {
263 ERR("Failed to get session name from `%s` action",
264 get_action_name(action));
265 ret = -1;
266 goto end;
267 }
268
269 session_lock_list();
270 session = session_find_by_name(session_name);
271 if (!session) {
272 DBG("Failed to find session `%s` by name while executing `%s` action of trigger `%p`",
273 session_name, get_action_name(action),
274 work_item->trigger);
275 goto error_unlock_list;
276 }
277
278 session_lock(session);
279 if (!is_trigger_allowed_for_session(work_item->trigger, session)) {
280 goto error_dispose_session;
281 }
282
283 cmd_ret = cmd_stop_trace(session);
284 switch (cmd_ret) {
285 case LTTNG_OK:
286 DBG("Successfully stopped session `%s` on behalf of trigger `%p`",
287 session_name, work_item->trigger);
288 break;
289 case LTTNG_ERR_TRACE_ALREADY_STOPPED:
290 DBG("Attempted to stop session `%s` on behalf of trigger `%p` but it was already stopped",
291 session_name, work_item->trigger);
292 break;
293 default:
294 WARN("Failed to stop session `%s` on behalf of trigger `%p`: %s",
295 session_name, work_item->trigger,
296 lttng_strerror(-cmd_ret));
297 break;
298 }
299
300 error_dispose_session:
301 session_unlock(session);
302 session_put(session);
303 error_unlock_list:
304 session_unlock_list();
305 end:
306 return ret;
307 }
308
309 static int action_executor_rotate_session_handler(struct action_executor *executor,
310 const struct action_work_item *work_item,
311 const struct lttng_action *action)
312 {
313 int ret = 0;
314 const char *session_name;
315 enum lttng_action_status action_status;
316 struct ltt_session *session;
317 enum lttng_error_code cmd_ret;
318
319 action_status = lttng_action_rotate_session_get_session_name(
320 action, &session_name);
321 if (action_status != LTTNG_ACTION_STATUS_OK) {
322 ERR("Failed to get session name from `%s` action",
323 get_action_name(action));
324 ret = -1;
325 goto end;
326 }
327
328 session_lock_list();
329 session = session_find_by_name(session_name);
330 if (!session) {
331 DBG("Failed to find session `%s` by name while executing `%s` action of trigger `%p`",
332 session_name, get_action_name(action),
333 work_item->trigger);
334 goto error_unlock_list;
335 }
336
337 session_lock(session);
338 if (!is_trigger_allowed_for_session(work_item->trigger, session)) {
339 goto error_dispose_session;
340 }
341
342 cmd_ret = cmd_rotate_session(session, NULL, false,
343 LTTNG_TRACE_CHUNK_COMMAND_TYPE_MOVE_TO_COMPLETED);
344 switch (cmd_ret) {
345 case LTTNG_OK:
346 DBG("Successfully started rotation of session `%s` on behalf of trigger `%p`",
347 session_name, work_item->trigger);
348 break;
349 case LTTNG_ERR_ROTATION_PENDING:
350 DBG("Attempted to start a rotation of session `%s` on behalf of trigger `%p` but a rotation is already ongoing",
351 session_name, work_item->trigger);
352 break;
353 case LTTNG_ERR_ROTATION_MULTIPLE_AFTER_STOP:
354 case LTTNG_ERR_ROTATION_AFTER_STOP_CLEAR:
355 DBG("Attempted to start a rotation of session `%s` on behalf of trigger `%p` but a rotation has already been completed since the last stop or clear",
356 session_name, work_item->trigger);
357 break;
358 default:
359 WARN("Failed to start a rotation of session `%s` on behalf of trigger `%p`: %s",
360 session_name, work_item->trigger,
361 lttng_strerror(-cmd_ret));
362 break;
363 }
364
365 error_dispose_session:
366 session_unlock(session);
367 session_put(session);
368 error_unlock_list:
369 session_unlock_list();
370 end:
371 return ret;
372 }
373
374 static int action_executor_snapshot_session_handler(struct action_executor *executor,
375 const struct action_work_item *work_item,
376 const struct lttng_action *action)
377 {
378 int ret = 0;
379 const char *session_name;
380 enum lttng_action_status action_status;
381 struct ltt_session *session;
382 const struct lttng_snapshot_output default_snapshot_output = {
383 .max_size = UINT64_MAX,
384 };
385 const struct lttng_snapshot_output *snapshot_output =
386 &default_snapshot_output;
387 enum lttng_error_code cmd_ret;
388
389 action_status = lttng_action_snapshot_session_get_session_name(
390 action, &session_name);
391 if (action_status != LTTNG_ACTION_STATUS_OK) {
392 ERR("Failed to get session name from `%s` action",
393 get_action_name(action));
394 ret = -1;
395 goto end;
396 }
397
398 action_status = lttng_action_snapshot_session_get_output(
399 action, &snapshot_output);
400 if (action_status != LTTNG_ACTION_STATUS_OK &&
401 action_status != LTTNG_ACTION_STATUS_UNSET) {
402 ERR("Failed to get output from `%s` action",
403 get_action_name(action));
404 ret = -1;
405 goto end;
406 }
407
408 session_lock_list();
409 session = session_find_by_name(session_name);
410 if (!session) {
411 DBG("Failed to find session `%s` by name while executing `%s` action of trigger `%p`",
412 session_name, get_action_name(action),
413 work_item->trigger);
414 goto error_unlock_list;
415 }
416
417
418 session_lock(session);
419 if (!is_trigger_allowed_for_session(work_item->trigger, session)) {
420 goto error_dispose_session;
421 }
422
423 cmd_ret = cmd_snapshot_record(session, snapshot_output, 0);
424 switch (cmd_ret) {
425 case LTTNG_OK:
426 DBG("Successfully recorded snapshot of session `%s` on behalf of trigger `%p`",
427 session_name, work_item->trigger);
428 break;
429 default:
430 WARN("Failed to record snapshot of session `%s` on behalf of trigger `%p`: %s",
431 session_name, work_item->trigger,
432 lttng_strerror(-cmd_ret));
433 break;
434 }
435
436 error_dispose_session:
437 session_unlock(session);
438 session_put(session);
439 error_unlock_list:
440 session_unlock_list();
441 end:
442 return ret;
443 }
444
445 static int action_executor_group_handler(struct action_executor *executor,
446 const struct action_work_item *work_item,
447 const struct lttng_action *action_group)
448 {
449 int ret = 0;
450 unsigned int i, count;
451 enum lttng_action_status action_status;
452
453 action_status = lttng_action_group_get_count(action_group, &count);
454 if (action_status != LTTNG_ACTION_STATUS_OK) {
455 /* Fatal error. */
456 ERR("Failed to get count of action in action group");
457 ret = -1;
458 goto end;
459 }
460
461 DBG("Action group has %u action%s", count, count != 1 ? "s" : "");
462 for (i = 0; i < count; i++) {
463 const struct lttng_action *action =
464 lttng_action_group_get_at_index(
465 action_group, i);
466
467 ret = action_executor_generic_handler(
468 executor, work_item, action);
469 if (ret) {
470 ERR("Stopping the execution of the action group of trigger `%p` following a fatal error",
471 work_item->trigger);
472 goto end;
473 }
474 }
475 end:
476 return ret;
477 }
478
479 static int action_executor_generic_handler(struct action_executor *executor,
480 const struct action_work_item *work_item,
481 const struct lttng_action *action)
482 {
483 DBG("Executing action `%s` of trigger `%p` action work item %" PRIu64,
484 get_action_name(action),
485 work_item->trigger,
486 work_item->id);
487
488 return action_executors[lttng_action_get_type(action)](
489 executor, work_item, action);
490 }
491
492 static int action_work_item_execute(struct action_executor *executor,
493 struct action_work_item *work_item)
494 {
495 int ret;
496 const struct lttng_action *action =
497 lttng_trigger_get_const_action(work_item->trigger);
498
499 DBG("Starting execution of action work item %" PRIu64 " of trigger `%p`",
500 work_item->id, work_item->trigger);
501 ret = action_executor_generic_handler(executor, work_item, action);
502 DBG("Completed execution of action work item %" PRIu64 " of trigger `%p`",
503 work_item->id, work_item->trigger);
504 return ret;
505 }
506
507 static void action_work_item_destroy(struct action_work_item *work_item)
508 {
509 lttng_trigger_put(work_item->trigger);
510 lttng_evaluation_destroy(work_item->evaluation);
511 notification_client_list_put(work_item->client_list);
512 free(work_item);
513 }
514
515 static void *action_executor_thread(void *_data)
516 {
517 struct action_executor *executor = _data;
518
519 assert(executor);
520
521 health_register(health_sessiond, HEALTH_SESSIOND_TYPE_ACTION_EXECUTOR);
522
523 rcu_register_thread();
524 rcu_thread_online();
525
526 DBG("Entering work execution loop");
527 pthread_mutex_lock(&executor->work.lock);
528 while (!executor->should_quit) {
529 int ret;
530 struct action_work_item *work_item;
531
532 health_code_update();
533 if (executor->work.pending_count == 0) {
534 health_poll_entry();
535 DBG("No work items enqueued, entering wait");
536 pthread_cond_wait(&executor->work.cond,
537 &executor->work.lock);
538 DBG("Woke-up from wait");
539 health_poll_exit();
540 continue;
541 }
542
543 /* Pop item from front of the listwith work lock held. */
544 work_item = cds_list_first_entry(&executor->work.list,
545 struct action_work_item, list_node);
546 cds_list_del(&work_item->list_node);
547 executor->work.pending_count--;
548
549 /*
550 * Work can be performed without holding the work lock,
551 * allowing new items to be queued.
552 */
553 pthread_mutex_unlock(&executor->work.lock);
554 ret = action_work_item_execute(executor, work_item);
555 action_work_item_destroy(work_item);
556 if (ret) {
557 /* Fatal error. */
558 break;
559 }
560
561 health_code_update();
562 pthread_mutex_lock(&executor->work.lock);
563 }
564
565 if (executor->should_quit) {
566 pthread_mutex_unlock(&executor->work.lock);
567 }
568 DBG("Left work execution loop");
569
570 health_code_update();
571
572 rcu_thread_offline();
573 rcu_unregister_thread();
574 health_unregister(health_sessiond);
575
576 return NULL;
577 }
578
579 static bool shutdown_action_executor_thread(void *_data)
580 {
581 struct action_executor *executor = _data;
582
583 executor->should_quit = true;
584 pthread_cond_signal(&executor->work.cond);
585 return true;
586 }
587
588 static void clean_up_action_executor_thread(void *_data)
589 {
590 struct action_executor *executor = _data;
591
592 assert(cds_list_empty(&executor->work.list));
593
594 pthread_mutex_destroy(&executor->work.lock);
595 pthread_cond_destroy(&executor->work.cond);
596 free(executor);
597 }
598
599 struct action_executor *action_executor_create(
600 struct notification_thread_handle *handle)
601 {
602 struct action_executor *executor = zmalloc(sizeof(*executor));
603
604 if (!executor) {
605 goto end;
606 }
607
608 CDS_INIT_LIST_HEAD(&executor->work.list);
609 pthread_cond_init(&executor->work.cond, NULL);
610 pthread_mutex_init(&executor->work.lock, NULL);
611 executor->notification_thread_handle = handle;
612
613 executor->thread = lttng_thread_create(THREAD_NAME,
614 action_executor_thread, shutdown_action_executor_thread,
615 clean_up_action_executor_thread, executor);
616 end:
617 return executor;
618 }
619
620 void action_executor_destroy(struct action_executor *executor)
621 {
622 struct action_work_item *work_item, *tmp;
623
624 /* TODO Wait for work list to drain? */
625 lttng_thread_shutdown(executor->thread);
626 pthread_mutex_lock(&executor->work.lock);
627 if (executor->work.pending_count != 0) {
628 WARN("%" PRIu64
629 " trigger action%s still queued for execution and will be discarded",
630 executor->work.pending_count,
631 executor->work.pending_count == 1 ? " is" :
632 "s are");
633 }
634
635 cds_list_for_each_entry_safe (
636 work_item, tmp, &executor->work.list, list_node) {
637 WARN("Discarding action work item %" PRIu64
638 " associated to trigger `%p`",
639 work_item->id, work_item->trigger);
640 cds_list_del(&work_item->list_node);
641 action_work_item_destroy(work_item);
642 }
643 pthread_mutex_unlock(&executor->work.lock);
644 lttng_thread_put(executor->thread);
645 }
646
647 /* RCU read-lock must be held by the caller. */
648 enum action_executor_status action_executor_enqueue(
649 struct action_executor *executor,
650 struct lttng_trigger *trigger,
651 struct lttng_evaluation *evaluation,
652 const struct lttng_credentials *object_creds,
653 struct notification_client_list *client_list)
654 {
655 enum action_executor_status executor_status = ACTION_EXECUTOR_STATUS_OK;
656 const uint64_t work_item_id = executor->next_work_item_id++;
657 struct action_work_item *work_item;
658 bool signal = false;
659
660 pthread_mutex_lock(&executor->work.lock);
661 /* Check for queue overflow. */
662 if (executor->work.pending_count >= MAX_QUEUED_WORK_COUNT) {
663 /* Most likely spammy, remove if it is the case. */
664 DBG("Refusing to enqueue action for trigger `%p` as work item %" PRIu64
665 " (overflow)",
666 trigger, work_item_id);
667 executor_status = ACTION_EXECUTOR_STATUS_OVERFLOW;
668 goto error_unlock;
669 }
670
671 work_item = zmalloc(sizeof(*work_item));
672 if (!work_item) {
673 PERROR("Failed to allocate action executor work item on behalf of trigger `%p`",
674 trigger);
675 executor_status = ACTION_EXECUTOR_STATUS_ERROR;
676 goto error_unlock;
677 }
678
679 lttng_trigger_get(trigger);
680 if (client_list) {
681 const bool reference_acquired =
682 notification_client_list_get(client_list);
683
684 assert(reference_acquired);
685 }
686
687 *work_item = (typeof(*work_item)){
688 .id = work_item_id,
689 .trigger = trigger,
690 /* Ownership transferred to the work item. */
691 .evaluation = evaluation,
692 .object_creds = {
693 .is_set = !!object_creds,
694 .value = object_creds ? *object_creds :
695 (typeof(work_item->object_creds.value)) {},
696 },
697 .client_list = client_list,
698 .list_node = CDS_LIST_HEAD_INIT(work_item->list_node),
699 };
700
701 evaluation = NULL;
702 cds_list_add_tail(&work_item->list_node, &executor->work.list);
703 executor->work.pending_count++;
704 DBG("Enqueued action for trigger `%p` as work item %" PRIu64,
705 trigger, work_item_id);
706 signal = true;
707
708 error_unlock:
709 pthread_mutex_unlock(&executor->work.lock);
710 if (signal) {
711 pthread_cond_signal(&executor->work.cond);
712 }
713
714 lttng_evaluation_destroy(evaluation);
715 return executor_status;
716 }
This page took 0.067136 seconds and 3 git commands to generate.