86dc2ff7bba5ca619a888d3ba39e05ffcbf5c6ab
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/sessiond-comm/sessiond-comm.h>
49 #include <common/sessiond-comm/inet.h>
50 #include <common/sessiond-comm/relayd.h>
51 #include <common/uri.h>
52 #include <common/utils.h>
53
54 #include "cmd.h"
55 #include "live.h"
56 #include "lttng-relayd.h"
57 #include "lttng-viewer.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60
61 static struct lttng_uri *live_uri;
62
63 /*
64 * Quit pipe for all threads. This permits a single cancellation point
65 * for all threads when receiving an event on the pipe.
66 */
67 static int live_thread_quit_pipe[2] = { -1, -1 };
68
69 /*
70 * This pipe is used to inform the worker thread that a command is queued and
71 * ready to be processed.
72 */
73 static int live_relay_cmd_pipe[2] = { -1, -1 };
74
75 /* Shared between threads */
76 static int live_dispatch_thread_exit;
77
78 static pthread_t live_listener_thread;
79 static pthread_t live_dispatcher_thread;
80 static pthread_t live_worker_thread;
81
82 /*
83 * Relay command queue.
84 *
85 * The live_thread_listener and live_thread_dispatcher communicate with this
86 * queue.
87 */
88 static struct relay_cmd_queue viewer_cmd_queue;
89
90 static uint64_t last_relay_viewer_session_id;
91
92 /*
93 * Cleanup the daemon
94 */
95 static
96 void cleanup(void)
97 {
98 DBG("Cleaning up");
99
100 free(live_uri);
101 }
102
103 /*
104 * Write to writable pipe used to notify a thread.
105 */
106 static
107 int notify_thread_pipe(int wpipe)
108 {
109 int ret;
110
111 do {
112 ret = write(wpipe, "!", 1);
113 } while (ret < 0 && errno == EINTR);
114 if (ret < 0 || ret != 1) {
115 PERROR("write poll pipe");
116 }
117
118 return ret;
119 }
120
121 /*
122 * Stop all threads by closing the thread quit pipe.
123 */
124 static
125 void stop_threads(void)
126 {
127 int ret;
128
129 /* Stopping all threads */
130 DBG("Terminating all live threads");
131 ret = notify_thread_pipe(live_thread_quit_pipe[1]);
132 if (ret < 0) {
133 ERR("write error on thread quit pipe");
134 }
135
136 /* Dispatch thread */
137 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
138 futex_nto1_wake(&viewer_cmd_queue.futex);
139 }
140
141 /*
142 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
143 */
144 static
145 int create_thread_poll_set(struct lttng_poll_event *events, int size)
146 {
147 int ret;
148
149 if (events == NULL || size == 0) {
150 ret = -1;
151 goto error;
152 }
153
154 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
155 if (ret < 0) {
156 goto error;
157 }
158
159 /* Add quit pipe */
160 ret = lttng_poll_add(events, live_thread_quit_pipe[0], LPOLLIN);
161 if (ret < 0) {
162 goto error;
163 }
164
165 return 0;
166
167 error:
168 return ret;
169 }
170
171 /*
172 * Check if the thread quit pipe was triggered.
173 *
174 * Return 1 if it was triggered else 0;
175 */
176 static
177 int check_thread_quit_pipe(int fd, uint32_t events)
178 {
179 if (fd == live_thread_quit_pipe[0] && (events & LPOLLIN)) {
180 return 1;
181 }
182
183 return 0;
184 }
185
186 /*
187 * Create and init socket from uri.
188 */
189 static
190 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
191 {
192 int ret;
193 struct lttcomm_sock *sock = NULL;
194
195 sock = lttcomm_alloc_sock_from_uri(uri);
196 if (sock == NULL) {
197 ERR("Allocating socket");
198 goto error;
199 }
200
201 ret = lttcomm_create_sock(sock);
202 if (ret < 0) {
203 goto error;
204 }
205 DBG("Listening on sock %d for live", sock->fd);
206
207 ret = sock->ops->bind(sock);
208 if (ret < 0) {
209 goto error;
210 }
211
212 ret = sock->ops->listen(sock, -1);
213 if (ret < 0) {
214 goto error;
215
216 }
217
218 return sock;
219
220 error:
221 if (sock) {
222 lttcomm_destroy_sock(sock);
223 }
224 return NULL;
225 }
226
227 /*
228 * This thread manages the listening for new connections on the network
229 */
230 static
231 void *thread_listener(void *data)
232 {
233 int i, ret, pollfd, err = -1;
234 int val = 1;
235 uint32_t revents, nb_fd;
236 struct lttng_poll_event events;
237 struct lttcomm_sock *live_control_sock;
238
239 DBG("[thread] Relay live listener started");
240
241 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
242
243 health_code_update();
244
245 live_control_sock = init_socket(live_uri);
246 if (!live_control_sock) {
247 goto error_sock_control;
248 }
249
250 /*
251 * Pass 3 as size here for the thread quit pipe, control and data socket.
252 */
253 ret = create_thread_poll_set(&events, 2);
254 if (ret < 0) {
255 goto error_create_poll;
256 }
257
258 /* Add the control socket */
259 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
260 if (ret < 0) {
261 goto error_poll_add;
262 }
263
264 while (1) {
265 health_code_update();
266
267 DBG("Listener accepting live viewers connections");
268
269 restart:
270 health_poll_entry();
271 ret = lttng_poll_wait(&events, -1);
272 health_poll_exit();
273 if (ret < 0) {
274 /*
275 * Restart interrupted system call.
276 */
277 if (errno == EINTR) {
278 goto restart;
279 }
280 goto error;
281 }
282 nb_fd = ret;
283
284 DBG("Relay new viewer connection received");
285 for (i = 0; i < nb_fd; i++) {
286 health_code_update();
287
288 /* Fetch once the poll data */
289 revents = LTTNG_POLL_GETEV(&events, i);
290 pollfd = LTTNG_POLL_GETFD(&events, i);
291
292 /* Thread quit pipe has been closed. Killing thread. */
293 ret = check_thread_quit_pipe(pollfd, revents);
294 if (ret) {
295 err = 0;
296 goto exit;
297 }
298
299 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
300 ERR("socket poll error");
301 goto error;
302 } else if (revents & LPOLLIN) {
303 /*
304 * Get allocated in this thread, enqueued to a global queue,
305 * dequeued and freed in the worker thread.
306 */
307 struct relay_command *relay_cmd;
308 struct lttcomm_sock *newsock;
309
310 relay_cmd = zmalloc(sizeof(*relay_cmd));
311 if (!relay_cmd) {
312 PERROR("relay command zmalloc");
313 goto error;
314 }
315
316 assert(pollfd == live_control_sock->fd);
317 newsock = live_control_sock->ops->accept(live_control_sock);
318 if (!newsock) {
319 PERROR("accepting control sock");
320 free(relay_cmd);
321 goto error;
322 }
323 DBG("Relay viewer connection accepted socket %d", newsock->fd);
324 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
325 sizeof(int));
326 if (ret < 0) {
327 PERROR("setsockopt inet");
328 lttcomm_destroy_sock(newsock);
329 free(relay_cmd);
330 goto error;
331 }
332 relay_cmd->sock = newsock;
333
334 /*
335 * Lock free enqueue the request.
336 */
337 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
338
339 /*
340 * Wake the dispatch queue futex. Implicit memory
341 * barrier with the exchange in cds_wfq_enqueue.
342 */
343 futex_nto1_wake(&viewer_cmd_queue.futex);
344 }
345 }
346 }
347
348 exit:
349 error:
350 error_poll_add:
351 lttng_poll_clean(&events);
352 error_create_poll:
353 if (live_control_sock->fd >= 0) {
354 ret = live_control_sock->ops->close(live_control_sock);
355 if (ret) {
356 PERROR("close");
357 }
358 }
359 lttcomm_destroy_sock(live_control_sock);
360 error_sock_control:
361 if (err) {
362 health_error();
363 DBG("Live viewer listener thread exited with error");
364 }
365 health_unregister(health_relayd);
366 DBG("Live viewer listener thread cleanup complete");
367 stop_threads();
368 return NULL;
369 }
370
371 /*
372 * This thread manages the dispatching of the requests to worker threads
373 */
374 static
375 void *thread_dispatcher(void *data)
376 {
377 int ret, err = -1;
378 struct cds_wfq_node *node;
379 struct relay_command *relay_cmd = NULL;
380
381 DBG("[thread] Live viewer relay dispatcher started");
382
383 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
384
385 health_code_update();
386
387 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
388 health_code_update();
389
390 /* Atomically prepare the queue futex */
391 futex_nto1_prepare(&viewer_cmd_queue.futex);
392
393 do {
394 health_code_update();
395
396 /* Dequeue commands */
397 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
398 if (node == NULL) {
399 DBG("Woken up but nothing in the live-viewer "
400 "relay command queue");
401 /* Continue thread execution */
402 break;
403 }
404
405 relay_cmd = caa_container_of(node, struct relay_command, node);
406 DBG("Dispatching viewer request waiting on sock %d",
407 relay_cmd->sock->fd);
408
409 /*
410 * Inform worker thread of the new request. This call is blocking
411 * so we can be assured that the data will be read at some point in
412 * time or wait to the end of the world :)
413 */
414 do {
415 ret = write(live_relay_cmd_pipe[1], relay_cmd,
416 sizeof(*relay_cmd));
417 } while (ret < 0 && errno == EINTR);
418 free(relay_cmd);
419 if (ret < 0 || ret != sizeof(struct relay_command)) {
420 PERROR("write cmd pipe");
421 goto error;
422 }
423 } while (node != NULL);
424
425 /* Futex wait on queue. Blocking call on futex() */
426 health_poll_entry();
427 futex_nto1_wait(&viewer_cmd_queue.futex);
428 health_poll_exit();
429 }
430
431 /* Normal exit, no error */
432 err = 0;
433
434 error:
435 if (err) {
436 health_error();
437 ERR("Health error occurred in %s", __func__);
438 }
439 health_unregister(health_relayd);
440 DBG("Live viewer dispatch thread dying");
441 stop_threads();
442 return NULL;
443 }
444
445 /*
446 * Establish connection with the viewer and check the versions.
447 *
448 * Return 0 on success or else negative value.
449 */
450 static
451 int viewer_connect(struct relay_command *cmd)
452 {
453 int ret;
454 struct lttng_viewer_connect reply, msg;
455
456 assert(cmd);
457
458 cmd->version_check_done = 1;
459
460 health_code_update();
461
462 /* Get version from the other side. */
463 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
464 if (ret < 0 || ret != sizeof(msg)) {
465 if (ret == 0) {
466 /* Orderly shutdown. Not necessary to print an error. */
467 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
468 } else {
469 ERR("Relay failed to receive the version values.");
470 }
471 ret = -1;
472 goto end;
473 }
474
475 health_code_update();
476
477 reply.major = RELAYD_VERSION_COMM_MAJOR;
478 reply.minor = RELAYD_VERSION_COMM_MINOR;
479
480 /* Major versions must be the same */
481 if (reply.major != be32toh(msg.major)) {
482 DBG("Incompatible major versions (%u vs %u)", reply.major,
483 be32toh(msg.major));
484 ret = 0;
485 goto end;
486 }
487
488 cmd->major = reply.major;
489 /* We adapt to the lowest compatible version */
490 if (reply.minor <= be32toh(msg.minor)) {
491 cmd->minor = reply.minor;
492 } else {
493 cmd->minor = be32toh(msg.minor);
494 }
495
496 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
497 cmd->type = RELAY_VIEWER_COMMAND;
498 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
499 cmd->type = RELAY_VIEWER_NOTIFICATION;
500 } else {
501 ERR("Unknown connection type : %u", be32toh(msg.type));
502 ret = -1;
503 goto end;
504 }
505
506 reply.major = htobe32(reply.major);
507 reply.minor = htobe32(reply.minor);
508 if (cmd->type == RELAY_VIEWER_COMMAND) {
509 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
510 }
511
512 health_code_update();
513
514 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
515 sizeof(struct lttng_viewer_connect), 0);
516 if (ret < 0) {
517 ERR("Relay sending version");
518 }
519
520 health_code_update();
521
522 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
523 ret = 0;
524
525 end:
526 return ret;
527 }
528
529 /*
530 * Send the viewer the list of current sessions.
531 *
532 * Return 0 on success or else a negative value.
533 */
534 static
535 int viewer_list_sessions(struct relay_command *cmd,
536 struct lttng_ht *sessions_ht)
537 {
538 int ret;
539 struct lttng_viewer_list_sessions session_list;
540 unsigned long count;
541 long approx_before, approx_after;
542 struct lttng_ht_node_ulong *node;
543 struct lttng_ht_iter iter;
544 struct lttng_viewer_session send_session;
545 struct relay_session *session;
546
547 DBG("List sessions received");
548
549 if (cmd->version_check_done == 0) {
550 ERR("Trying to list sessions before version check");
551 ret = -1;
552 goto end_no_session;
553 }
554
555 rcu_read_lock();
556 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
557 session_list.sessions_count = htobe32(count);
558
559 health_code_update();
560
561 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
562 sizeof(session_list), 0);
563 if (ret < 0) {
564 ERR("Relay sending sessions list");
565 goto end_unlock;
566 }
567
568 health_code_update();
569
570 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
571 health_code_update();
572
573 node = lttng_ht_iter_get_node_ulong(&iter);
574 if (!node) {
575 goto end_unlock;
576 }
577 session = caa_container_of(node, struct relay_session, session_n);
578
579 strncpy(send_session.session_name, session->session_name,
580 sizeof(send_session.session_name));
581 strncpy(send_session.hostname, session->hostname,
582 sizeof(send_session.hostname));
583 send_session.id = htobe64(session->id);
584 send_session.live_timer = htobe32(session->live_timer);
585 send_session.clients = htobe32(session->viewer_attached);
586 send_session.streams = htobe32(session->stream_count);
587
588 health_code_update();
589
590 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
591 sizeof(send_session), 0);
592 if (ret < 0) {
593 ERR("Relay sending session info");
594 goto end_unlock;
595 }
596 }
597 health_code_update();
598
599 rcu_read_unlock();
600 ret = 0;
601 goto end;
602
603 end_unlock:
604 rcu_read_unlock();
605
606 end:
607 end_no_session:
608 return ret;
609 }
610
611 /*
612 * Allocate and init a new viewer_stream.
613 *
614 * Copies the values from the stream passed in parameter and insert the new
615 * stream in the viewer_streams_ht.
616 *
617 * MUST be called with rcu_read_lock held.
618 *
619 * Returns 0 on success or a negative value on error.
620 */
621 static
622 int init_viewer_stream(struct relay_stream *stream)
623 {
624 int ret;
625 struct relay_viewer_stream *viewer_stream;
626
627 assert(stream);
628
629 viewer_stream = zmalloc(sizeof(*viewer_stream));
630 if (!viewer_stream) {
631 PERROR("relay viewer stream zmalloc");
632 ret = -1;
633 goto error;
634 }
635
636 viewer_stream->read_fd = -1;
637 viewer_stream->index_read_fd = -1;
638 viewer_stream->session_id = stream->session->id;
639 viewer_stream->stream_handle = stream->stream_handle;
640 viewer_stream->path_name = strndup(stream->path_name,
641 LTTNG_VIEWER_PATH_MAX);
642 viewer_stream->channel_name = strndup(stream->channel_name,
643 LTTNG_VIEWER_NAME_MAX);
644 viewer_stream->total_index_received = stream->total_index_received;
645 viewer_stream->tracefile_size = stream->tracefile_size;
646 viewer_stream->tracefile_count = stream->tracefile_count;
647 viewer_stream->metadata_flag = stream->metadata_flag;
648
649 /*
650 * This is to avoid a race between the initialization of this object and
651 * the close of the given stream. If the stream is unable to find this
652 * viewer stream when closing, this copy will at least take the latest
653 * value.
654 */
655 viewer_stream->total_index_received = stream->total_index_received;
656
657 /*
658 * The deletion of this ctf_trace object is only done in a call RCU of the
659 * relay stream making it valid as long as we have the read side lock.
660 */
661 viewer_stream->ctf_trace = stream->ctf_trace;
662 uatomic_inc(&viewer_stream->ctf_trace->refcount);
663
664 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
665 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
666
667 ret = 0;
668
669 error:
670 return ret;
671 }
672
673 /*
674 * Send the viewer the list of current sessions.
675 */
676 static
677 int viewer_attach_session(struct relay_command *cmd,
678 struct lttng_ht *sessions_ht)
679 {
680 int ret, send_streams = 0, nb_streams = 0;
681 struct lttng_viewer_attach_session_request request;
682 struct lttng_viewer_attach_session_response response;
683 struct lttng_viewer_stream send_stream;
684 struct relay_stream *stream;
685 struct relay_viewer_stream *viewer_stream;
686 struct lttng_ht_node_ulong *node;
687 struct lttng_ht_node_u64 *node64;
688 struct lttng_ht_iter iter;
689 struct relay_session *session;
690
691 assert(cmd);
692 assert(sessions_ht);
693
694 DBG("Attach session received");
695
696 if (cmd->version_check_done == 0) {
697 ERR("Trying to attach session before version check");
698 ret = -1;
699 goto end_no_session;
700 }
701
702 health_code_update();
703
704 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
705 if (ret < 0 || ret != sizeof(request)) {
706 if (ret == 0) {
707 /* Orderly shutdown. Not necessary to print an error. */
708 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
709 } else {
710 ERR("Relay failed to receive the attach parameters.");
711 }
712 ret = -1;
713 goto error;
714 }
715
716 health_code_update();
717
718 rcu_read_lock();
719 lttng_ht_lookup(sessions_ht,
720 (void *)((unsigned long) be64toh(request.session_id)), &iter);
721 node = lttng_ht_iter_get_node_ulong(&iter);
722 if (node == NULL) {
723 DBG("Relay session %" PRIu64 " not found",
724 be64toh(request.session_id));
725 response.status = htobe32(VIEWER_ATTACH_UNK);
726 goto send_reply;
727 }
728
729 session = caa_container_of(node, struct relay_session, session_n);
730 if (cmd->session_id == session->id) {
731 /* Same viewer already attached, just send the stream list. */
732 send_streams = 1;
733 response.status = htobe32(VIEWER_ATTACH_OK);
734 } else if (session->viewer_attached != 0) {
735 DBG("Already a viewer attached");
736 response.status = htobe32(VIEWER_ATTACH_ALREADY);
737 goto send_reply;
738 } else if (session->live_timer == 0) {
739 DBG("Not live session");
740 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
741 goto send_reply;
742 } else {
743 session->viewer_attached++;
744 send_streams = 1;
745 response.status = htobe32(VIEWER_ATTACH_OK);
746 cmd->session_id = session->id;
747 cmd->session = session;
748 }
749
750 switch (be32toh(request.seek)) {
751 case VIEWER_SEEK_BEGINNING:
752 /* Default behaviour. */
753 break;
754 case VIEWER_SEEK_LAST:
755 /* TODO */
756 break;
757 default:
758 ERR("Wrong seek parameter");
759 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
760 send_streams = 0;
761 goto send_reply;
762 }
763
764 if (send_streams) {
765 /* We should only be there if we have a session to attach to. */
766 assert(session);
767
768 /*
769 * Fill the viewer_streams_ht to count the number of streams
770 * ready to be sent and avoid concurrency issues on the
771 * relay_streams_ht and don't rely on a total session stream count.
772 */
773 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
774 struct relay_viewer_stream *vstream;
775
776 health_code_update();
777
778 node = lttng_ht_iter_get_node_ulong(&iter);
779 if (!node) {
780 continue;
781 }
782 stream = caa_container_of(node, struct relay_stream, stream_n);
783 if (stream->session != cmd->session) {
784 continue;
785 }
786
787 /*
788 * Don't send streams with no ctf_trace, they are not ready to be
789 * read.
790 */
791 if (!stream->ctf_trace) {
792 continue;
793 }
794
795 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
796 if (!vstream) {
797 ret = init_viewer_stream(stream);
798 if (ret < 0) {
799 goto end_unlock;
800 }
801 }
802 nb_streams++;
803 }
804 response.streams_count = htobe32(nb_streams);
805 }
806
807 send_reply:
808 health_code_update();
809 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
810 if (ret < 0) {
811 ERR("Relay sending viewer attach response");
812 goto end_unlock;
813 }
814 health_code_update();
815
816 /*
817 * Unknown or busy session, just return gracefully, the viewer knows what
818 * is happening.
819 */
820 if (!send_streams) {
821 ret = 0;
822 goto end_unlock;
823 }
824
825 /* We should only be there if we have a session to attach to. */
826 assert(session);
827 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
828 health_code_update();
829
830 node64 = lttng_ht_iter_get_node_u64(&iter);
831 if (!node64) {
832 continue;
833 }
834 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
835 stream_n);
836 if (viewer_stream->session_id != cmd->session->id) {
837 continue;
838 }
839
840 send_stream.id = htobe64(viewer_stream->stream_handle);
841 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
842 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
843 strncpy(send_stream.path_name, viewer_stream->path_name,
844 sizeof(send_stream.path_name));
845 strncpy(send_stream.channel_name, viewer_stream->channel_name,
846 sizeof(send_stream.channel_name));
847
848 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
849 sizeof(send_stream), 0);
850 if (ret < 0) {
851 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
852 goto end_unlock;
853 }
854 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
855 }
856 ret = 0;
857
858 end_unlock:
859 rcu_read_unlock();
860 end_no_session:
861 error:
862 return ret;
863 }
864
865 /*
866 * Open index file using a given viewer stream.
867 *
868 * Return 0 on success or else a negative value.
869 */
870 static int open_index(struct relay_viewer_stream *stream)
871 {
872 int ret;
873 char fullpath[PATH_MAX];
874 struct lttng_packet_index_file_hdr hdr;
875
876 if (stream->tracefile_size > 0) {
877 /* For now we don't support on-disk ring buffer. */
878 ret = -1;
879 goto end;
880 } else {
881 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR
882 "/%s" DEFAULT_INDEX_FILE_SUFFIX,
883 stream->path_name, stream->channel_name);
884 if (ret < 0) {
885 PERROR("snprintf index path");
886 goto error;
887 }
888 }
889
890 DBG("Opening index file %s in read only", fullpath);
891 ret = open(fullpath, O_RDONLY);
892 if (ret < 0) {
893 if (errno == ENOENT) {
894 ret = ENOENT;
895 goto error;
896 } else {
897 PERROR("opening index in read-only");
898 }
899 goto error;
900 }
901 stream->index_read_fd = ret;
902 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
903
904 do {
905 health_code_update();
906 ret = read(stream->index_read_fd, &hdr, sizeof(hdr));
907 } while (ret < 0 && errno == EINTR);
908 if (ret < 0) {
909 PERROR("Reading index header");
910 goto error;
911 }
912 if (strncmp(hdr.magic, INDEX_MAGIC, sizeof(hdr.magic)) != 0) {
913 ERR("Invalid header magic");
914 ret = -1;
915 goto error;
916 }
917 if (be32toh(hdr.index_major) != INDEX_MAJOR ||
918 be32toh(hdr.index_minor) != INDEX_MINOR) {
919 ERR("Invalid header version");
920 ret = -1;
921 goto error;
922 }
923 ret = 0;
924
925 error:
926 end:
927 return ret;
928 }
929
930 /*
931 * Get viewer stream from stream id.
932 *
933 * RCU read side lock MUST be acquired.
934 */
935 struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
936 {
937 struct lttng_ht_node_u64 *node;
938 struct lttng_ht_iter iter;
939 struct relay_viewer_stream *stream = NULL;
940
941 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
942 node = lttng_ht_iter_get_node_u64(&iter);
943 if (node == NULL) {
944 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
945 goto end;
946 }
947 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
948
949 end:
950 return stream;
951 }
952
953 /*
954 * Send the next index for a stream.
955 *
956 * Return 0 on success or else a negative value.
957 */
958 static
959 int viewer_get_next_index(struct relay_command *cmd,
960 struct lttng_ht *sessions_ht)
961 {
962 int ret;
963 struct lttng_viewer_get_next_index request_index;
964 struct lttng_viewer_index viewer_index;
965 struct lttng_packet_index packet_index;
966 struct relay_viewer_stream *vstream;
967 struct relay_stream *rstream;
968
969 assert(cmd);
970 assert(sessions_ht);
971
972 DBG("Viewer get next index");
973
974 if (cmd->version_check_done == 0) {
975 ERR("Trying to request index before version check");
976 ret = -1;
977 goto end_no_session;
978 }
979
980 health_code_update();
981 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
982 sizeof(request_index), 0);
983 if (ret < 0 || ret != sizeof(request_index)) {
984 ret = -1;
985 ERR("Relay didn't receive the whole packet");
986 goto end;
987 }
988 health_code_update();
989
990 rcu_read_lock();
991 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
992 if (!vstream) {
993 ret = -1;
994 goto end_unlock;
995 }
996
997 memset(&viewer_index, 0, sizeof(viewer_index));
998
999 /*
1000 * The viewer should not ask for index on metadata stream.
1001 */
1002 if (vstream->metadata_flag) {
1003 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1004 goto send_reply;
1005 }
1006
1007 /* First time, we open the index file */
1008 if (vstream->index_read_fd < 0) {
1009 ret = open_index(vstream);
1010 if (ret == ENOENT) {
1011 /*
1012 * The index is created only when the first data packet arrives, it
1013 * might not be ready at the beginning of the session
1014 */
1015 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1016 goto send_reply;
1017 } else if (ret < 0) {
1018 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1019 goto send_reply;
1020 }
1021 }
1022
1023 rstream = relay_stream_find_by_id(vstream->stream_handle);
1024 if (rstream) {
1025 if (rstream->beacon_ts_end != -1ULL &&
1026 vstream->last_sent_index == rstream->total_index_received) {
1027 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1028 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1029 goto send_reply;
1030 }
1031
1032 if (rstream->total_index_received <= vstream->last_sent_index) {
1033 /* No new index to send, retry later. */
1034 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1035 goto send_reply;
1036 }
1037 } else if (!rstream &&
1038 vstream->total_index_received == vstream->last_sent_index) {
1039 /* Last index sent and stream closed */
1040 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1041 goto send_reply;
1042 }
1043
1044 if (!vstream->ctf_trace->metadata_received ||
1045 vstream->ctf_trace->metadata_received >
1046 vstream->ctf_trace->metadata_sent) {
1047 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1048 }
1049
1050 do {
1051 health_code_update();
1052 ret = read(vstream->index_read_fd, &packet_index,
1053 sizeof(packet_index));
1054 } while (ret < 0 && errno == EINTR);
1055 if (ret < sizeof(packet_index)) {
1056 PERROR("Relay reading index file");
1057 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1058 } else {
1059 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1060 vstream->last_sent_index++;
1061 }
1062
1063 /*
1064 * Indexes are stored in big endian, no need to switch before sending.
1065 */
1066 viewer_index.offset = packet_index.offset;
1067 viewer_index.packet_size = packet_index.packet_size;
1068 viewer_index.content_size = packet_index.content_size;
1069 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1070 viewer_index.timestamp_end = packet_index.timestamp_end;
1071 viewer_index.events_discarded = packet_index.events_discarded;
1072 viewer_index.stream_id = packet_index.stream_id;
1073
1074 send_reply:
1075 viewer_index.flags = htobe32(viewer_index.flags);
1076 health_code_update();
1077 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1078 sizeof(viewer_index), 0);
1079 if (ret < 0) {
1080 ERR("Relay index to viewer");
1081 goto end_unlock;
1082 }
1083 health_code_update();
1084
1085 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1086 vstream->last_sent_index, vstream->stream_handle);
1087
1088 end_unlock:
1089 rcu_read_unlock();
1090
1091 end_no_session:
1092 end:
1093 return ret;
1094 }
1095
1096 /*
1097 * Send the next index for a stream
1098 *
1099 * Return 0 on success or else a negative value.
1100 */
1101 static
1102 int viewer_get_packet(struct relay_command *cmd)
1103 {
1104 int ret, send_data = 0;
1105 char *data = NULL;
1106 uint32_t len = 0;
1107 ssize_t read_len;
1108 struct lttng_viewer_get_packet get_packet_info;
1109 struct lttng_viewer_trace_packet reply;
1110 struct relay_viewer_stream *stream;
1111
1112 assert(cmd);
1113
1114 DBG2("Relay get data packet");
1115
1116 if (cmd->version_check_done == 0) {
1117 ERR("Trying to get packet before version check");
1118 ret = -1;
1119 goto end;
1120 }
1121
1122 health_code_update();
1123 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1124 sizeof(get_packet_info), 0);
1125 if (ret < 0 || ret != sizeof(get_packet_info)) {
1126 ret = -1;
1127 ERR("Relay didn't receive the whole packet");
1128 goto end;
1129 }
1130 health_code_update();
1131
1132 /* From this point on, the error label can be reached. */
1133 memset(&reply, 0, sizeof(reply));
1134
1135 rcu_read_lock();
1136 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1137 if (!stream) {
1138 goto error;
1139 }
1140 assert(stream->ctf_trace);
1141
1142 /*
1143 * First time we read this stream, we need open the tracefile, we should
1144 * only arrive here if an index has already been sent to the viewer, so the
1145 * tracefile must exist, if it does not it is a fatal error.
1146 */
1147 if (stream->read_fd < 0) {
1148 char fullpath[PATH_MAX];
1149
1150 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1151 stream->channel_name);
1152 if (ret < 0) {
1153 goto error;
1154 }
1155 ret = open(fullpath, O_RDONLY);
1156 if (ret < 0) {
1157 PERROR("Relay opening trace file");
1158 goto error;
1159 }
1160 stream->read_fd = ret;
1161 }
1162
1163 if (!stream->ctf_trace->metadata_received ||
1164 stream->ctf_trace->metadata_received >
1165 stream->ctf_trace->metadata_sent) {
1166 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1167 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1168 goto send_reply;
1169 }
1170
1171 len = be32toh(get_packet_info.len);
1172 data = zmalloc(len);
1173 if (!data) {
1174 PERROR("relay data zmalloc");
1175 goto error;
1176 }
1177
1178 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1179 if (ret < 0) {
1180 PERROR("lseek");
1181 goto error;
1182 }
1183 read_len = read(stream->read_fd, data, len);
1184 if (read_len < (ssize_t) len) {
1185 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1186 stream->read_fd, be64toh(get_packet_info.offset));
1187 goto error;
1188 }
1189 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1190 reply.len = htobe32(len);
1191 send_data = 1;
1192 goto send_reply;
1193
1194 error:
1195 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1196
1197 send_reply:
1198 reply.flags = htobe32(reply.flags);
1199
1200 health_code_update();
1201 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1202 if (ret < 0) {
1203 ERR("Relay data header to viewer");
1204 goto end_unlock;
1205 }
1206 health_code_update();
1207
1208 if (send_data) {
1209 health_code_update();
1210 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1211 if (ret < 0) {
1212 ERR("Relay send data to viewer");
1213 goto end_unlock;
1214 }
1215 health_code_update();
1216 }
1217
1218 DBG("Sent %u bytes for stream %" PRIu64, len,
1219 be64toh(get_packet_info.stream_id));
1220
1221 end_unlock:
1222 free(data);
1223 rcu_read_unlock();
1224
1225 end:
1226 return ret;
1227 }
1228
1229 /*
1230 * Send the session's metadata
1231 *
1232 * Return 0 on success else a negative value.
1233 */
1234 static
1235 int viewer_get_metadata(struct relay_command *cmd)
1236 {
1237 int ret = 0;
1238 ssize_t read_len;
1239 uint64_t len = 0;
1240 char *data = NULL;
1241 struct lttng_viewer_get_metadata request;
1242 struct lttng_viewer_metadata_packet reply;
1243 struct relay_viewer_stream *stream;
1244
1245 assert(cmd);
1246
1247 DBG("Relay get metadata");
1248
1249 if (cmd->version_check_done == 0) {
1250 ERR("Trying to get metadata before version check");
1251 ret = -1;
1252 goto end;
1253 }
1254
1255 health_code_update();
1256 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1257 sizeof(request), 0);
1258 if (ret < 0 || ret != sizeof(request)) {
1259 ret = -1;
1260 ERR("Relay didn't receive the whole packet");
1261 goto end;
1262 }
1263 health_code_update();
1264
1265 rcu_read_lock();
1266 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1267 if (!stream || !stream->metadata_flag) {
1268 ERR("Invalid metadata stream");
1269 goto error;
1270 }
1271 assert(stream->ctf_trace);
1272 assert(stream->ctf_trace->metadata_sent <=
1273 stream->ctf_trace->metadata_received);
1274
1275 len = stream->ctf_trace->metadata_received -
1276 stream->ctf_trace->metadata_sent;
1277 if (len == 0) {
1278 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1279 goto send_reply;
1280 }
1281
1282 /* first time, we open the metadata file */
1283 if (stream->read_fd < 0) {
1284 char fullpath[PATH_MAX];
1285
1286 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1287 stream->channel_name);
1288 if (ret < 0) {
1289 goto error;
1290 }
1291 ret = open(fullpath, O_RDONLY);
1292 if (ret < 0) {
1293 PERROR("Relay opening metadata file");
1294 goto error;
1295 }
1296 stream->read_fd = ret;
1297 }
1298
1299 reply.len = htobe64(len);
1300 data = zmalloc(len);
1301 if (!data) {
1302 PERROR("viewer metadata zmalloc");
1303 goto error;
1304 }
1305
1306 read_len = read(stream->read_fd, data, len);
1307 if (read_len < (ssize_t) len) {
1308 PERROR("Relay reading metadata file");
1309 goto error;
1310 }
1311 stream->ctf_trace->metadata_sent += read_len;
1312 reply.status = htobe32(VIEWER_METADATA_OK);
1313 goto send_reply;
1314
1315 error:
1316 reply.status = htobe32(VIEWER_METADATA_ERR);
1317
1318 send_reply:
1319 health_code_update();
1320 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1321 if (ret < 0) {
1322 ERR("Relay data header to viewer");
1323 goto end_unlock;
1324 }
1325 health_code_update();
1326
1327 if (len > 0) {
1328 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1329 if (ret < 0) {
1330 ERR("Relay send data to viewer");
1331 goto end_unlock;
1332 }
1333 }
1334
1335 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1336 be64toh(request.stream_id));
1337
1338 DBG("Metadata sent");
1339
1340 end_unlock:
1341 free(data);
1342 rcu_read_unlock();
1343 end:
1344 return ret;
1345 }
1346
1347 /*
1348 * live_relay_unknown_command: send -1 if received unknown command
1349 */
1350 static
1351 void live_relay_unknown_command(struct relay_command *cmd)
1352 {
1353 struct lttcomm_relayd_generic_reply reply;
1354 int ret;
1355
1356 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1357 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1358 sizeof(struct lttcomm_relayd_generic_reply), 0);
1359 if (ret < 0) {
1360 ERR("Relay sending unknown command");
1361 }
1362 }
1363
1364 /*
1365 * Process the commands received on the control socket
1366 */
1367 static
1368 int process_control(struct lttng_viewer_cmd *recv_hdr,
1369 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1370 {
1371 int ret = 0;
1372
1373 switch (be32toh(recv_hdr->cmd)) {
1374 case VIEWER_CONNECT:
1375 ret = viewer_connect(cmd);
1376 break;
1377 case VIEWER_LIST_SESSIONS:
1378 ret = viewer_list_sessions(cmd, sessions_ht);
1379 break;
1380 case VIEWER_ATTACH_SESSION:
1381 ret = viewer_attach_session(cmd, sessions_ht);
1382 break;
1383 case VIEWER_GET_NEXT_INDEX:
1384 ret = viewer_get_next_index(cmd, sessions_ht);
1385 break;
1386 case VIEWER_GET_PACKET:
1387 ret = viewer_get_packet(cmd);
1388 break;
1389 case VIEWER_GET_METADATA:
1390 ret = viewer_get_metadata(cmd);
1391 break;
1392 default:
1393 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1394 live_relay_unknown_command(cmd);
1395 ret = -1;
1396 goto end;
1397 }
1398
1399 end:
1400 return ret;
1401 }
1402
1403 static
1404 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1405 {
1406 int ret;
1407
1408 assert(events);
1409
1410 lttng_poll_del(events, pollfd);
1411
1412 ret = close(pollfd);
1413 if (ret < 0) {
1414 ERR("Closing pollfd %d", pollfd);
1415 }
1416 }
1417
1418 /*
1419 * Create and add connection to the given hash table.
1420 *
1421 * Return poll add value or else -1 on error.
1422 */
1423 static
1424 int add_connection(int fd, struct lttng_poll_event *events,
1425 struct lttng_ht *relay_connections_ht)
1426 {
1427 int ret;
1428 struct relay_command *relay_connection;
1429
1430 assert(events);
1431 assert(relay_connections_ht);
1432
1433 relay_connection = zmalloc(sizeof(struct relay_command));
1434 if (relay_connection == NULL) {
1435 PERROR("Relay command zmalloc");
1436 goto error;
1437 }
1438
1439 do {
1440 health_code_update();
1441 ret = read(fd, relay_connection, sizeof(*relay_connection));
1442 } while (ret < 0 && errno == EINTR);
1443 if (ret < 0 || ret < sizeof(*relay_connection)) {
1444 PERROR("read relay cmd pipe");
1445 goto error_read;
1446 }
1447
1448 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1449 (unsigned long) relay_connection->sock->fd);
1450 rcu_read_lock();
1451 lttng_ht_add_unique_ulong(relay_connections_ht,
1452 &relay_connection->sock_n);
1453 rcu_read_unlock();
1454
1455 return lttng_poll_add(events, relay_connection->sock->fd,
1456 LPOLLIN | LPOLLRDHUP);
1457
1458 error_read:
1459 free(relay_connection);
1460 error:
1461 return -1;
1462 }
1463
1464 static
1465 void deferred_free_connection(struct rcu_head *head)
1466 {
1467 struct relay_command *relay_connection =
1468 caa_container_of(head, struct relay_command, rcu_node);
1469
1470 if (relay_connection->session &&
1471 relay_connection->session->viewer_attached > 0) {
1472 relay_connection->session->viewer_attached--;
1473 }
1474 lttcomm_destroy_sock(relay_connection->sock);
1475 free(relay_connection);
1476 }
1477
1478 static
1479 void deferred_free_viewer_stream(struct rcu_head *head)
1480 {
1481 struct relay_viewer_stream *stream =
1482 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1483
1484 if (stream->ctf_trace) {
1485 uatomic_dec(&stream->ctf_trace->refcount);
1486 assert(uatomic_read(&stream->ctf_trace->refcount) >= 0);
1487 if (uatomic_read(&stream->ctf_trace->refcount) == 0) {
1488 DBG("Freeing ctf_trace %" PRIu64, stream->ctf_trace->id);
1489 free(stream->ctf_trace);
1490 }
1491 }
1492
1493 free(stream->path_name);
1494 free(stream->channel_name);
1495 free(stream);
1496 }
1497
1498 static
1499 void viewer_del_streams(uint64_t session_id)
1500 {
1501 int ret;
1502 struct relay_viewer_stream *stream;
1503 struct lttng_ht_node_u64 *node;
1504 struct lttng_ht_iter iter;
1505
1506 rcu_read_lock();
1507 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1508 health_code_update();
1509
1510 node = lttng_ht_iter_get_node_u64(&iter);
1511 if (!node) {
1512 continue;
1513 }
1514
1515 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1516 if (stream->session_id != session_id) {
1517 continue;
1518 }
1519
1520 if (stream->read_fd > 0) {
1521 ret = close(stream->read_fd);
1522 if (ret < 0) {
1523 PERROR("close read_fd");
1524 }
1525 }
1526 if (stream->index_read_fd > 0) {
1527 ret = close(stream->index_read_fd);
1528 if (ret < 0) {
1529 PERROR("close index_read_fd");
1530 }
1531 }
1532 if (stream->metadata_flag && stream->ctf_trace) {
1533 stream->ctf_trace->metadata_sent = 0;
1534 }
1535 ret = lttng_ht_del(viewer_streams_ht, &iter);
1536 assert(!ret);
1537 call_rcu(&stream->rcu_node, deferred_free_viewer_stream);
1538 }
1539 rcu_read_unlock();
1540 }
1541
1542 /*
1543 * Delete and free a connection.
1544 *
1545 * RCU read side lock MUST be acquired.
1546 */
1547 static
1548 void del_connection(struct lttng_ht *relay_connections_ht,
1549 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1550 {
1551 int ret;
1552
1553 assert(relay_connections_ht);
1554 assert(iter);
1555 assert(relay_connection);
1556
1557 ret = lttng_ht_del(relay_connections_ht, iter);
1558 assert(!ret);
1559
1560 viewer_del_streams(relay_connection->session_id);
1561
1562 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1563 }
1564
1565 /*
1566 * This thread does the actual work
1567 */
1568 static
1569 void *thread_worker(void *data)
1570 {
1571 int ret, err = -1;
1572 uint32_t nb_fd;
1573 struct relay_command *relay_connection;
1574 struct lttng_poll_event events;
1575 struct lttng_ht *relay_connections_ht;
1576 struct lttng_ht_node_ulong *node;
1577 struct lttng_ht_iter iter;
1578 struct lttng_viewer_cmd recv_hdr;
1579 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1580 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1581
1582 DBG("[thread] Live viewer relay worker started");
1583
1584 rcu_register_thread();
1585
1586 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1587
1588 /* table of connections indexed on socket */
1589 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1590 if (!relay_connections_ht) {
1591 goto relay_connections_ht_error;
1592 }
1593
1594 ret = create_thread_poll_set(&events, 2);
1595 if (ret < 0) {
1596 goto error_poll_create;
1597 }
1598
1599 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1600 if (ret < 0) {
1601 goto error;
1602 }
1603
1604 restart:
1605 while (1) {
1606 int i;
1607
1608 health_code_update();
1609
1610 /* Infinite blocking call, waiting for transmission */
1611 DBG3("Relayd live viewer worker thread polling...");
1612 health_poll_entry();
1613 ret = lttng_poll_wait(&events, -1);
1614 health_poll_exit();
1615 if (ret < 0) {
1616 /*
1617 * Restart interrupted system call.
1618 */
1619 if (errno == EINTR) {
1620 goto restart;
1621 }
1622 goto error;
1623 }
1624
1625 nb_fd = ret;
1626
1627 /*
1628 * Process control. The control connection is prioritised so we don't
1629 * starve it with high throughput tracing data on the data
1630 * connection.
1631 */
1632 for (i = 0; i < nb_fd; i++) {
1633 /* Fetch once the poll data */
1634 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1635 int pollfd = LTTNG_POLL_GETFD(&events, i);
1636
1637 health_code_update();
1638
1639 /* Thread quit pipe has been closed. Killing thread. */
1640 ret = check_thread_quit_pipe(pollfd, revents);
1641 if (ret) {
1642 err = 0;
1643 goto exit;
1644 }
1645
1646 /* Inspect the relay cmd pipe for new connection */
1647 if (pollfd == live_relay_cmd_pipe[0]) {
1648 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1649 ERR("Relay live pipe error");
1650 goto error;
1651 } else if (revents & LPOLLIN) {
1652 DBG("Relay live viewer command received");
1653 ret = add_connection(live_relay_cmd_pipe[0],
1654 &events, relay_connections_ht);
1655 if (ret < 0) {
1656 goto error;
1657 }
1658 }
1659 } else if (revents) {
1660 rcu_read_lock();
1661 lttng_ht_lookup(relay_connections_ht,
1662 (void *)((unsigned long) pollfd), &iter);
1663 node = lttng_ht_iter_get_node_ulong(&iter);
1664 if (node == NULL) {
1665 DBG2("Relay viewer sock %d not found", pollfd);
1666 rcu_read_unlock();
1667 goto error;
1668 }
1669 relay_connection = caa_container_of(node, struct relay_command,
1670 sock_n);
1671
1672 if (revents & (LPOLLERR)) {
1673 cleanup_poll_connection(&events, pollfd);
1674 del_connection(relay_connections_ht, &iter,
1675 relay_connection);
1676 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1677 DBG("Viewer socket %d hung up", pollfd);
1678 cleanup_poll_connection(&events, pollfd);
1679 del_connection(relay_connections_ht, &iter,
1680 relay_connection);
1681 } else if (revents & LPOLLIN) {
1682 ret = relay_connection->sock->ops->recvmsg(
1683 relay_connection->sock, &recv_hdr,
1684 sizeof(struct lttng_viewer_cmd),
1685 0);
1686 /* connection closed */
1687 if (ret <= 0) {
1688 cleanup_poll_connection(&events, pollfd);
1689 del_connection( relay_connections_ht, &iter,
1690 relay_connection);
1691 DBG("Viewer control connection closed with %d",
1692 pollfd);
1693 } else {
1694 if (relay_connection->session) {
1695 DBG2("Relay viewer worker receiving data for "
1696 "session: %" PRIu64,
1697 relay_connection->session->id);
1698 }
1699 ret = process_control(&recv_hdr, relay_connection,
1700 sessions_ht);
1701 if (ret < 0) {
1702 /* Clear the session on error. */
1703 cleanup_poll_connection(&events, pollfd);
1704 del_connection(relay_connections_ht, &iter,
1705 relay_connection);
1706 DBG("Viewer connection closed with %d", pollfd);
1707 }
1708 }
1709 }
1710 rcu_read_unlock();
1711 }
1712 }
1713 }
1714
1715 exit:
1716 error:
1717 lttng_poll_clean(&events);
1718
1719 /* empty the hash table and free the memory */
1720 rcu_read_lock();
1721 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1722 health_code_update();
1723
1724 node = lttng_ht_iter_get_node_ulong(&iter);
1725 if (!node) {
1726 continue;
1727 }
1728
1729 relay_connection = caa_container_of(node, struct relay_command,
1730 sock_n);
1731 del_connection(relay_connections_ht, &iter, relay_connection);
1732 }
1733 rcu_read_unlock();
1734 error_poll_create:
1735 lttng_ht_destroy(relay_connections_ht);
1736 relay_connections_ht_error:
1737 /* Close relay cmd pipes */
1738 utils_close_pipe(live_relay_cmd_pipe);
1739 if (err) {
1740 DBG("Viewer worker thread exited with error");
1741 }
1742 DBG("Viewer worker thread cleanup complete");
1743 if (err) {
1744 health_error();
1745 ERR("Health error occurred in %s", __func__);
1746 }
1747 health_unregister(health_relayd);
1748 stop_threads();
1749 rcu_unregister_thread();
1750 return NULL;
1751 }
1752
1753 /*
1754 * Create the relay command pipe to wake thread_manage_apps.
1755 * Closed in cleanup().
1756 */
1757 static int create_relay_cmd_pipe(void)
1758 {
1759 int ret;
1760
1761 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
1762
1763 return ret;
1764 }
1765
1766 void live_stop_threads()
1767 {
1768 int ret;
1769 void *status;
1770
1771 stop_threads();
1772
1773 ret = pthread_join(live_listener_thread, &status);
1774 if (ret != 0) {
1775 PERROR("pthread_join live listener");
1776 goto error; /* join error, exit without cleanup */
1777 }
1778
1779 ret = pthread_join(live_worker_thread, &status);
1780 if (ret != 0) {
1781 PERROR("pthread_join live worker");
1782 goto error; /* join error, exit without cleanup */
1783 }
1784
1785 ret = pthread_join(live_dispatcher_thread, &status);
1786 if (ret != 0) {
1787 PERROR("pthread_join live dispatcher");
1788 goto error; /* join error, exit without cleanup */
1789 }
1790
1791 cleanup();
1792
1793 error:
1794 return;
1795 }
1796
1797 /*
1798 * main
1799 */
1800 int live_start_threads(struct lttng_uri *uri,
1801 struct relay_local_data *relay_ctx, int quit_pipe[2])
1802 {
1803 int ret = 0;
1804 void *status;
1805 int is_root;
1806
1807 assert(uri);
1808 live_uri = uri;
1809
1810 live_thread_quit_pipe[0] = quit_pipe[0];
1811 live_thread_quit_pipe[1] = quit_pipe[1];
1812
1813 /* Check if daemon is UID = 0 */
1814 is_root = !getuid();
1815
1816 if (!is_root) {
1817 if (live_uri->port < 1024) {
1818 ERR("Need to be root to use ports < 1024");
1819 ret = -1;
1820 goto exit;
1821 }
1822 }
1823
1824 /* Setup the thread apps communication pipe. */
1825 if ((ret = create_relay_cmd_pipe()) < 0) {
1826 goto exit;
1827 }
1828
1829 /* Init relay command queue. */
1830 cds_wfq_init(&viewer_cmd_queue.queue);
1831
1832 /* Set up max poll set size */
1833 lttng_poll_set_max_size();
1834
1835 /* Setup the dispatcher thread */
1836 ret = pthread_create(&live_dispatcher_thread, NULL,
1837 thread_dispatcher, (void *) NULL);
1838 if (ret != 0) {
1839 PERROR("pthread_create viewer dispatcher");
1840 goto exit_dispatcher;
1841 }
1842
1843 /* Setup the worker thread */
1844 ret = pthread_create(&live_worker_thread, NULL,
1845 thread_worker, relay_ctx);
1846 if (ret != 0) {
1847 PERROR("pthread_create viewer worker");
1848 goto exit_worker;
1849 }
1850
1851 /* Setup the listener thread */
1852 ret = pthread_create(&live_listener_thread, NULL,
1853 thread_listener, (void *) NULL);
1854 if (ret != 0) {
1855 PERROR("pthread_create viewer listener");
1856 goto exit_listener;
1857 }
1858
1859 ret = 0;
1860 goto end;
1861
1862 exit_listener:
1863 ret = pthread_join(live_listener_thread, &status);
1864 if (ret != 0) {
1865 PERROR("pthread_join live listener");
1866 goto error; /* join error, exit without cleanup */
1867 }
1868
1869 exit_worker:
1870 ret = pthread_join(live_worker_thread, &status);
1871 if (ret != 0) {
1872 PERROR("pthread_join live worker");
1873 goto error; /* join error, exit without cleanup */
1874 }
1875
1876 exit_dispatcher:
1877 ret = pthread_join(live_dispatcher_thread, &status);
1878 if (ret != 0) {
1879 PERROR("pthread_join live dispatcher");
1880 goto error; /* join error, exit without cleanup */
1881 }
1882
1883 exit:
1884 cleanup();
1885
1886 end:
1887 error:
1888 return ret;
1889 }
This page took 0.076993 seconds and 3 git commands to generate.