80c3da57ac63d2ee0fd69b5942e063c7aefa4f16
[lttng-tools.git] / src / bin / lttng-relayd / live.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #define _GNU_SOURCE
20 #include <getopt.h>
21 #include <grp.h>
22 #include <limits.h>
23 #include <pthread.h>
24 #include <signal.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/mount.h>
30 #include <sys/resource.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <inttypes.h>
36 #include <urcu/futex.h>
37 #include <urcu/uatomic.h>
38 #include <unistd.h>
39 #include <fcntl.h>
40 #include <config.h>
41
42 #include <lttng/lttng.h>
43 #include <common/common.h>
44 #include <common/compat/poll.h>
45 #include <common/compat/socket.h>
46 #include <common/defaults.h>
47 #include <common/futex.h>
48 #include <common/sessiond-comm/sessiond-comm.h>
49 #include <common/sessiond-comm/inet.h>
50 #include <common/sessiond-comm/relayd.h>
51 #include <common/uri.h>
52 #include <common/utils.h>
53
54 #include "cmd.h"
55 #include "live.h"
56 #include "lttng-relayd.h"
57 #include "lttng-viewer.h"
58 #include "utils.h"
59 #include "health-relayd.h"
60 #include "testpoint.h"
61
62 static struct lttng_uri *live_uri;
63
64 /*
65 * This pipe is used to inform the worker thread that a command is queued and
66 * ready to be processed.
67 */
68 static int live_relay_cmd_pipe[2] = { -1, -1 };
69
70 /* Shared between threads */
71 static int live_dispatch_thread_exit;
72
73 static pthread_t live_listener_thread;
74 static pthread_t live_dispatcher_thread;
75 static pthread_t live_worker_thread;
76
77 /*
78 * Relay command queue.
79 *
80 * The live_thread_listener and live_thread_dispatcher communicate with this
81 * queue.
82 */
83 static struct relay_cmd_queue viewer_cmd_queue;
84
85 static uint64_t last_relay_viewer_session_id;
86
87 /*
88 * Cleanup the daemon
89 */
90 static
91 void cleanup(void)
92 {
93 DBG("Cleaning up");
94
95 free(live_uri);
96 }
97
98 /*
99 * Write to writable pipe used to notify a thread.
100 */
101 static
102 int notify_thread_pipe(int wpipe)
103 {
104 ssize_t ret;
105
106 ret = lttng_write(wpipe, "!", 1);
107 if (ret < 1) {
108 PERROR("write poll pipe");
109 }
110
111 return (int) ret;
112 }
113
114 /*
115 * Stop all threads by closing the thread quit pipe.
116 */
117 static
118 void stop_threads(void)
119 {
120 int ret;
121
122 /* Stopping all threads */
123 DBG("Terminating all live threads");
124 ret = notify_thread_pipe(thread_quit_pipe[1]);
125 if (ret < 0) {
126 ERR("write error on thread quit pipe");
127 }
128
129 /* Dispatch thread */
130 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
131 futex_nto1_wake(&viewer_cmd_queue.futex);
132 }
133
134 /*
135 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
136 */
137 static
138 int create_thread_poll_set(struct lttng_poll_event *events, int size)
139 {
140 int ret;
141
142 if (events == NULL || size == 0) {
143 ret = -1;
144 goto error;
145 }
146
147 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
148 if (ret < 0) {
149 goto error;
150 }
151
152 /* Add quit pipe */
153 ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
154 if (ret < 0) {
155 goto error;
156 }
157
158 return 0;
159
160 error:
161 return ret;
162 }
163
164 /*
165 * Check if the thread quit pipe was triggered.
166 *
167 * Return 1 if it was triggered else 0;
168 */
169 static
170 int check_thread_quit_pipe(int fd, uint32_t events)
171 {
172 if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) {
173 return 1;
174 }
175
176 return 0;
177 }
178
179 /*
180 * Create and init socket from uri.
181 */
182 static
183 struct lttcomm_sock *init_socket(struct lttng_uri *uri)
184 {
185 int ret;
186 struct lttcomm_sock *sock = NULL;
187
188 sock = lttcomm_alloc_sock_from_uri(uri);
189 if (sock == NULL) {
190 ERR("Allocating socket");
191 goto error;
192 }
193
194 ret = lttcomm_create_sock(sock);
195 if (ret < 0) {
196 goto error;
197 }
198 DBG("Listening on sock %d for live", sock->fd);
199
200 ret = sock->ops->bind(sock);
201 if (ret < 0) {
202 goto error;
203 }
204
205 ret = sock->ops->listen(sock, -1);
206 if (ret < 0) {
207 goto error;
208
209 }
210
211 return sock;
212
213 error:
214 if (sock) {
215 lttcomm_destroy_sock(sock);
216 }
217 return NULL;
218 }
219
220 /*
221 * This thread manages the listening for new connections on the network
222 */
223 static
224 void *thread_listener(void *data)
225 {
226 int i, ret, pollfd, err = -1;
227 int val = 1;
228 uint32_t revents, nb_fd;
229 struct lttng_poll_event events;
230 struct lttcomm_sock *live_control_sock;
231
232 DBG("[thread] Relay live listener started");
233
234 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
235
236 health_code_update();
237
238 live_control_sock = init_socket(live_uri);
239 if (!live_control_sock) {
240 goto error_sock_control;
241 }
242
243 /* Pass 2 as size here for the thread quit pipe and control sockets. */
244 ret = create_thread_poll_set(&events, 2);
245 if (ret < 0) {
246 goto error_create_poll;
247 }
248
249 /* Add the control socket */
250 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
251 if (ret < 0) {
252 goto error_poll_add;
253 }
254
255 lttng_relay_notify_ready();
256
257 if (testpoint(relayd_thread_live_listener)) {
258 goto error_testpoint;
259 }
260
261 while (1) {
262 health_code_update();
263
264 DBG("Listener accepting live viewers connections");
265
266 restart:
267 health_poll_entry();
268 ret = lttng_poll_wait(&events, -1);
269 health_poll_exit();
270 if (ret < 0) {
271 /*
272 * Restart interrupted system call.
273 */
274 if (errno == EINTR) {
275 goto restart;
276 }
277 goto error;
278 }
279 nb_fd = ret;
280
281 DBG("Relay new viewer connection received");
282 for (i = 0; i < nb_fd; i++) {
283 health_code_update();
284
285 /* Fetch once the poll data */
286 revents = LTTNG_POLL_GETEV(&events, i);
287 pollfd = LTTNG_POLL_GETFD(&events, i);
288
289 /* Thread quit pipe has been closed. Killing thread. */
290 ret = check_thread_quit_pipe(pollfd, revents);
291 if (ret) {
292 err = 0;
293 goto exit;
294 }
295
296 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
297 ERR("socket poll error");
298 goto error;
299 } else if (revents & LPOLLIN) {
300 /*
301 * Get allocated in this thread, enqueued to a global queue,
302 * dequeued and freed in the worker thread.
303 */
304 struct relay_command *relay_cmd;
305 struct lttcomm_sock *newsock;
306
307 relay_cmd = zmalloc(sizeof(*relay_cmd));
308 if (!relay_cmd) {
309 PERROR("relay command zmalloc");
310 goto error;
311 }
312
313 assert(pollfd == live_control_sock->fd);
314 newsock = live_control_sock->ops->accept(live_control_sock);
315 if (!newsock) {
316 PERROR("accepting control sock");
317 free(relay_cmd);
318 goto error;
319 }
320 DBG("Relay viewer connection accepted socket %d", newsock->fd);
321 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
322 sizeof(int));
323 if (ret < 0) {
324 PERROR("setsockopt inet");
325 lttcomm_destroy_sock(newsock);
326 free(relay_cmd);
327 goto error;
328 }
329 relay_cmd->sock = newsock;
330
331 /*
332 * Lock free enqueue the request.
333 */
334 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
335
336 /*
337 * Wake the dispatch queue futex. Implicit memory
338 * barrier with the exchange in cds_wfq_enqueue.
339 */
340 futex_nto1_wake(&viewer_cmd_queue.futex);
341 }
342 }
343 }
344
345 exit:
346 error:
347 error_poll_add:
348 error_testpoint:
349 lttng_poll_clean(&events);
350 error_create_poll:
351 if (live_control_sock->fd >= 0) {
352 ret = live_control_sock->ops->close(live_control_sock);
353 if (ret) {
354 PERROR("close");
355 }
356 }
357 lttcomm_destroy_sock(live_control_sock);
358 error_sock_control:
359 if (err) {
360 health_error();
361 DBG("Live viewer listener thread exited with error");
362 }
363 health_unregister(health_relayd);
364 DBG("Live viewer listener thread cleanup complete");
365 stop_threads();
366 return NULL;
367 }
368
369 /*
370 * This thread manages the dispatching of the requests to worker threads
371 */
372 static
373 void *thread_dispatcher(void *data)
374 {
375 int err = -1;
376 ssize_t ret;
377 struct cds_wfq_node *node;
378 struct relay_command *relay_cmd = NULL;
379
380 DBG("[thread] Live viewer relay dispatcher started");
381
382 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
383
384 if (testpoint(relayd_thread_live_dispatcher)) {
385 goto error_testpoint;
386 }
387
388 health_code_update();
389
390 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
391 health_code_update();
392
393 /* Atomically prepare the queue futex */
394 futex_nto1_prepare(&viewer_cmd_queue.futex);
395
396 do {
397 health_code_update();
398
399 /* Dequeue commands */
400 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
401 if (node == NULL) {
402 DBG("Woken up but nothing in the live-viewer "
403 "relay command queue");
404 /* Continue thread execution */
405 break;
406 }
407
408 relay_cmd = caa_container_of(node, struct relay_command, node);
409 DBG("Dispatching viewer request waiting on sock %d",
410 relay_cmd->sock->fd);
411
412 /*
413 * Inform worker thread of the new request. This call is blocking
414 * so we can be assured that the data will be read at some point in
415 * time or wait to the end of the world :)
416 */
417 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
418 sizeof(*relay_cmd));
419 free(relay_cmd);
420 if (ret < sizeof(struct relay_command)) {
421 PERROR("write cmd pipe");
422 goto error;
423 }
424 } while (node != NULL);
425
426 /* Futex wait on queue. Blocking call on futex() */
427 health_poll_entry();
428 futex_nto1_wait(&viewer_cmd_queue.futex);
429 health_poll_exit();
430 }
431
432 /* Normal exit, no error */
433 err = 0;
434
435 error:
436 error_testpoint:
437 if (err) {
438 health_error();
439 ERR("Health error occurred in %s", __func__);
440 }
441 health_unregister(health_relayd);
442 DBG("Live viewer dispatch thread dying");
443 stop_threads();
444 return NULL;
445 }
446
447 /*
448 * Establish connection with the viewer and check the versions.
449 *
450 * Return 0 on success or else negative value.
451 */
452 static
453 int viewer_connect(struct relay_command *cmd)
454 {
455 int ret;
456 struct lttng_viewer_connect reply, msg;
457
458 assert(cmd);
459
460 cmd->version_check_done = 1;
461
462 health_code_update();
463
464 /* Get version from the other side. */
465 ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
466 if (ret < 0 || ret != sizeof(msg)) {
467 if (ret == 0) {
468 /* Orderly shutdown. Not necessary to print an error. */
469 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
470 } else {
471 ERR("Relay failed to receive the version values.");
472 }
473 ret = -1;
474 goto end;
475 }
476
477 health_code_update();
478
479 reply.major = RELAYD_VERSION_COMM_MAJOR;
480 reply.minor = RELAYD_VERSION_COMM_MINOR;
481
482 /* Major versions must be the same */
483 if (reply.major != be32toh(msg.major)) {
484 DBG("Incompatible major versions (%u vs %u)", reply.major,
485 be32toh(msg.major));
486 ret = -1;
487 goto end;
488 }
489
490 cmd->major = reply.major;
491 /* We adapt to the lowest compatible version */
492 if (reply.minor <= be32toh(msg.minor)) {
493 cmd->minor = reply.minor;
494 } else {
495 cmd->minor = be32toh(msg.minor);
496 }
497
498 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
499 cmd->type = RELAY_VIEWER_COMMAND;
500 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
501 cmd->type = RELAY_VIEWER_NOTIFICATION;
502 } else {
503 ERR("Unknown connection type : %u", be32toh(msg.type));
504 ret = -1;
505 goto end;
506 }
507
508 reply.major = htobe32(reply.major);
509 reply.minor = htobe32(reply.minor);
510 if (cmd->type == RELAY_VIEWER_COMMAND) {
511 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
512 }
513
514 health_code_update();
515
516 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
517 sizeof(struct lttng_viewer_connect), 0);
518 if (ret < 0) {
519 ERR("Relay sending version");
520 }
521
522 health_code_update();
523
524 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
525 ret = 0;
526
527 end:
528 return ret;
529 }
530
531 /*
532 * Send the viewer the list of current sessions.
533 *
534 * Return 0 on success or else a negative value.
535 */
536 static
537 int viewer_list_sessions(struct relay_command *cmd,
538 struct lttng_ht *sessions_ht)
539 {
540 int ret;
541 struct lttng_viewer_list_sessions session_list;
542 unsigned long count;
543 long approx_before, approx_after;
544 struct lttng_ht_node_ulong *node;
545 struct lttng_ht_iter iter;
546 struct lttng_viewer_session send_session;
547 struct relay_session *session;
548
549 DBG("List sessions received");
550
551 if (cmd->version_check_done == 0) {
552 ERR("Trying to list sessions before version check");
553 ret = -1;
554 goto end_no_session;
555 }
556
557 rcu_read_lock();
558 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
559 session_list.sessions_count = htobe32(count);
560
561 health_code_update();
562
563 ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
564 sizeof(session_list), 0);
565 if (ret < 0) {
566 ERR("Relay sending sessions list");
567 goto end_unlock;
568 }
569
570 health_code_update();
571
572 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
573 health_code_update();
574
575 node = lttng_ht_iter_get_node_ulong(&iter);
576 if (!node) {
577 goto end_unlock;
578 }
579 session = caa_container_of(node, struct relay_session, session_n);
580
581 strncpy(send_session.session_name, session->session_name,
582 sizeof(send_session.session_name));
583 strncpy(send_session.hostname, session->hostname,
584 sizeof(send_session.hostname));
585 send_session.id = htobe64(session->id);
586 send_session.live_timer = htobe32(session->live_timer);
587 send_session.clients = htobe32(session->viewer_attached);
588 send_session.streams = htobe32(session->stream_count);
589
590 health_code_update();
591
592 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
593 sizeof(send_session), 0);
594 if (ret < 0) {
595 ERR("Relay sending session info");
596 goto end_unlock;
597 }
598 }
599 health_code_update();
600
601 rcu_read_unlock();
602 ret = 0;
603 goto end;
604
605 end_unlock:
606 rcu_read_unlock();
607
608 end:
609 end_no_session:
610 return ret;
611 }
612
613 /*
614 * Open index file using a given viewer stream.
615 *
616 * Return 0 on success or else a negative value.
617 */
618 static int open_index(struct relay_viewer_stream *stream)
619 {
620 int ret;
621 char fullpath[PATH_MAX];
622 struct ctf_packet_index_file_hdr hdr;
623
624 if (stream->tracefile_count > 0) {
625 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
626 PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
627 stream->channel_name, stream->tracefile_count_current);
628 } else {
629 ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
630 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
631 stream->channel_name);
632 }
633 if (ret < 0) {
634 PERROR("snprintf index path");
635 goto error;
636 }
637
638 DBG("Opening index file %s in read only", fullpath);
639 ret = open(fullpath, O_RDONLY);
640 if (ret < 0) {
641 if (errno == ENOENT) {
642 ret = -ENOENT;
643 goto error;
644 } else {
645 PERROR("opening index in read-only");
646 }
647 goto error;
648 }
649 stream->index_read_fd = ret;
650 DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
651
652 ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
653 if (ret < sizeof(hdr)) {
654 PERROR("Reading index header");
655 goto error;
656 }
657 if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) {
658 ERR("Invalid header magic");
659 ret = -1;
660 goto error;
661 }
662 if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR ||
663 be32toh(hdr.index_minor) != CTF_INDEX_MINOR) {
664 ERR("Invalid header version");
665 ret = -1;
666 goto error;
667 }
668 ret = 0;
669
670 error:
671 return ret;
672 }
673
674 /*
675 * Allocate and init a new viewer_stream.
676 *
677 * Copies the values from the stream passed in parameter and insert the new
678 * stream in the viewer_streams_ht.
679 *
680 * MUST be called with rcu_read_lock held.
681 *
682 * Returns 0 on success or a negative value on error.
683 */
684 static
685 int init_viewer_stream(struct relay_stream *stream, int seek_last)
686 {
687 int ret;
688 struct relay_viewer_stream *viewer_stream;
689
690 assert(stream);
691
692 viewer_stream = zmalloc(sizeof(*viewer_stream));
693 if (!viewer_stream) {
694 PERROR("relay viewer stream zmalloc");
695 ret = -1;
696 goto error;
697 }
698 viewer_stream->session_id = stream->session->id;
699 viewer_stream->stream_handle = stream->stream_handle;
700 viewer_stream->path_name = strndup(stream->path_name,
701 LTTNG_VIEWER_PATH_MAX);
702 viewer_stream->channel_name = strndup(stream->channel_name,
703 LTTNG_VIEWER_NAME_MAX);
704 viewer_stream->tracefile_count = stream->tracefile_count;
705 viewer_stream->metadata_flag = stream->metadata_flag;
706 viewer_stream->tracefile_count_last = -1ULL;
707 if (seek_last) {
708 viewer_stream->tracefile_count_current =
709 stream->tracefile_count_current;
710 } else {
711 viewer_stream->tracefile_count_current =
712 stream->oldest_tracefile_id;
713 }
714
715 viewer_stream->ctf_trace = stream->ctf_trace;
716 if (viewer_stream->metadata_flag) {
717 viewer_stream->ctf_trace->viewer_metadata_stream =
718 viewer_stream;
719 }
720 uatomic_inc(&viewer_stream->ctf_trace->refcount);
721
722 lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
723 lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
724
725 viewer_stream->index_read_fd = -1;
726 viewer_stream->read_fd = -1;
727
728 /*
729 * This is to avoid a race between the initialization of this object and
730 * the close of the given stream. If the stream is unable to find this
731 * viewer stream when closing, this copy will at least take the latest
732 * value.
733 * We also need that for the seek_last.
734 */
735 viewer_stream->total_index_received = stream->total_index_received;
736
737 /*
738 * If we never received an index for the current stream, delay
739 * the opening of the index, otherwise open it right now.
740 */
741 if (viewer_stream->tracefile_count_current ==
742 stream->tracefile_count_current &&
743 viewer_stream->total_index_received == 0) {
744 viewer_stream->index_read_fd = -1;
745 } else {
746 ret = open_index(viewer_stream);
747 if (ret < 0) {
748 goto error;
749 }
750 }
751
752 if (seek_last && viewer_stream->index_read_fd > 0) {
753 ret = lseek(viewer_stream->index_read_fd,
754 viewer_stream->total_index_received *
755 sizeof(struct ctf_packet_index),
756 SEEK_CUR);
757 if (ret < 0) {
758 goto error;
759 }
760 viewer_stream->last_sent_index =
761 viewer_stream->total_index_received;
762 }
763
764 ret = 0;
765
766 error:
767 return ret;
768 }
769
770 /*
771 * Rotate a stream to the next tracefile.
772 *
773 * Returns 0 on success, 1 on EOF, a negative value on error.
774 */
775 static
776 int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
777 struct relay_stream *stream)
778 {
779 int ret;
780 uint64_t tracefile_id;
781
782 assert(viewer_stream);
783
784 tracefile_id = (viewer_stream->tracefile_count_current + 1) %
785 viewer_stream->tracefile_count;
786 /*
787 * Detect the last tracefile to open.
788 */
789 if (viewer_stream->tracefile_count_last != -1ULL &&
790 viewer_stream->tracefile_count_last ==
791 viewer_stream->tracefile_count_current) {
792 ret = 1;
793 goto end;
794 }
795
796 if (stream) {
797 pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
798 }
799 /*
800 * The writer and the reader are not working in the same
801 * tracefile, we can read up to EOF, we don't care about the
802 * total_index_received.
803 */
804 if (!stream || (stream->tracefile_count_current != tracefile_id)) {
805 viewer_stream->close_write_flag = 1;
806 } else {
807 /*
808 * We are opening a file that is still open in write, make
809 * sure we limit our reading to the number of indexes
810 * received.
811 */
812 viewer_stream->close_write_flag = 0;
813 if (stream) {
814 viewer_stream->total_index_received =
815 stream->total_index_received;
816 }
817 }
818 viewer_stream->tracefile_count_current = tracefile_id;
819
820 ret = close(viewer_stream->index_read_fd);
821 if (ret < 0) {
822 PERROR("close index file %d",
823 viewer_stream->index_read_fd);
824 }
825 viewer_stream->index_read_fd = -1;
826 ret = close(viewer_stream->read_fd);
827 if (ret < 0) {
828 PERROR("close tracefile %d",
829 viewer_stream->read_fd);
830 }
831 viewer_stream->read_fd = -1;
832
833 pthread_mutex_lock(&viewer_stream->overwrite_lock);
834 viewer_stream->abort_flag = 0;
835 pthread_mutex_unlock(&viewer_stream->overwrite_lock);
836
837 viewer_stream->index_read_fd = -1;
838 viewer_stream->read_fd = -1;
839
840 if (stream) {
841 pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
842 }
843 ret = open_index(viewer_stream);
844 if (ret < 0) {
845 goto error;
846 }
847
848 ret = 0;
849
850 end:
851 error:
852 return ret;
853 }
854
855 /*
856 * Send the viewer the list of current sessions.
857 */
858 static
859 int viewer_attach_session(struct relay_command *cmd,
860 struct lttng_ht *sessions_ht)
861 {
862 int ret, send_streams = 0;
863 uint32_t nb_streams = 0, nb_streams_ready = 0;
864 struct lttng_viewer_attach_session_request request;
865 struct lttng_viewer_attach_session_response response;
866 struct lttng_viewer_stream send_stream;
867 struct relay_stream *stream;
868 struct relay_viewer_stream *viewer_stream;
869 struct lttng_ht_node_ulong *node;
870 struct lttng_ht_node_u64 *node64;
871 struct lttng_ht_iter iter;
872 struct relay_session *session;
873 int seek_last = 0;
874
875 assert(cmd);
876 assert(sessions_ht);
877
878 DBG("Attach session received");
879
880 if (cmd->version_check_done == 0) {
881 ERR("Trying to attach session before version check");
882 ret = -1;
883 goto end_no_session;
884 }
885
886 health_code_update();
887
888 ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
889 if (ret < 0 || ret != sizeof(request)) {
890 if (ret == 0) {
891 /* Orderly shutdown. Not necessary to print an error. */
892 DBG("Socket %d did an orderly shutdown", cmd->sock->fd);
893 } else {
894 ERR("Relay failed to receive the attach parameters.");
895 }
896 ret = -1;
897 goto error;
898 }
899
900 health_code_update();
901
902 rcu_read_lock();
903 lttng_ht_lookup(sessions_ht,
904 (void *)((unsigned long) be64toh(request.session_id)), &iter);
905 node = lttng_ht_iter_get_node_ulong(&iter);
906 if (node == NULL) {
907 DBG("Relay session %" PRIu64 " not found",
908 be64toh(request.session_id));
909 response.status = htobe32(VIEWER_ATTACH_UNK);
910 goto send_reply;
911 }
912
913 session = caa_container_of(node, struct relay_session, session_n);
914 if (cmd->session_id == session->id) {
915 /* Same viewer already attached, just send the stream list. */
916 send_streams = 1;
917 response.status = htobe32(VIEWER_ATTACH_OK);
918 } else if (session->viewer_attached != 0) {
919 DBG("Already a viewer attached");
920 response.status = htobe32(VIEWER_ATTACH_ALREADY);
921 goto send_reply;
922 } else if (session->live_timer == 0) {
923 DBG("Not live session");
924 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
925 goto send_reply;
926 } else {
927 session->viewer_attached++;
928 send_streams = 1;
929 response.status = htobe32(VIEWER_ATTACH_OK);
930 cmd->session_id = session->id;
931 cmd->session = session;
932 }
933
934 switch (be32toh(request.seek)) {
935 case VIEWER_SEEK_BEGINNING:
936 /* Default behaviour. */
937 break;
938 case VIEWER_SEEK_LAST:
939 seek_last = 1;
940 break;
941 default:
942 ERR("Wrong seek parameter");
943 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
944 send_streams = 0;
945 goto send_reply;
946 }
947
948 if (send_streams) {
949 /* We should only be there if we have a session to attach to. */
950 assert(session);
951
952 /*
953 * Fill the viewer_streams_ht to count the number of streams
954 * ready to be sent and avoid concurrency issues on the
955 * relay_streams_ht and don't rely on a total session stream count.
956 */
957 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
958 struct relay_viewer_stream *vstream;
959
960 health_code_update();
961
962 node = lttng_ht_iter_get_node_ulong(&iter);
963 if (!node) {
964 continue;
965 }
966 stream = caa_container_of(node, struct relay_stream, stream_n);
967 if (stream->session != cmd->session) {
968 continue;
969 }
970 nb_streams++;
971
972 /*
973 * Don't send streams with no ctf_trace, they are not
974 * ready to be read.
975 */
976 if (!stream->ctf_trace || !stream->viewer_ready) {
977 continue;
978 }
979 nb_streams_ready++;
980
981 vstream = live_find_viewer_stream_by_id(stream->stream_handle);
982 if (!vstream) {
983 ret = init_viewer_stream(stream, seek_last);
984 if (ret < 0) {
985 goto end_unlock;
986 }
987 }
988 }
989
990 /* We must have the same amount of existing stream and ready stream. */
991 if (nb_streams != nb_streams_ready) {
992 nb_streams = 0;
993 }
994 response.streams_count = htobe32(nb_streams);
995 }
996
997 send_reply:
998 health_code_update();
999 ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
1000 if (ret < 0) {
1001 ERR("Relay sending viewer attach response");
1002 goto end_unlock;
1003 }
1004 health_code_update();
1005
1006 /*
1007 * Unknown or empty session, just return gracefully, the viewer knows what
1008 * is happening.
1009 */
1010 if (!send_streams || !nb_streams) {
1011 ret = 0;
1012 goto end_unlock;
1013 }
1014
1015 /* We should only be there if we have a session to attach to. */
1016 assert(session);
1017 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
1018 health_code_update();
1019
1020 node64 = lttng_ht_iter_get_node_u64(&iter);
1021 if (!node64) {
1022 continue;
1023 }
1024 viewer_stream = caa_container_of(node64, struct relay_viewer_stream,
1025 stream_n);
1026 if (viewer_stream->session_id != cmd->session->id) {
1027 continue;
1028 }
1029
1030 send_stream.id = htobe64(viewer_stream->stream_handle);
1031 send_stream.ctf_trace_id = htobe64(viewer_stream->ctf_trace->id);
1032 send_stream.metadata_flag = htobe32(viewer_stream->metadata_flag);
1033 strncpy(send_stream.path_name, viewer_stream->path_name,
1034 sizeof(send_stream.path_name));
1035 strncpy(send_stream.channel_name, viewer_stream->channel_name,
1036 sizeof(send_stream.channel_name));
1037
1038 ret = cmd->sock->ops->sendmsg(cmd->sock, &send_stream,
1039 sizeof(send_stream), 0);
1040 if (ret < 0) {
1041 ERR("Relay sending stream %" PRIu64, viewer_stream->stream_handle);
1042 goto end_unlock;
1043 }
1044 DBG("Sent stream %" PRIu64 " to viewer", viewer_stream->stream_handle);
1045 }
1046 ret = 0;
1047
1048 end_unlock:
1049 rcu_read_unlock();
1050 end_no_session:
1051 error:
1052 return ret;
1053 }
1054
1055 /*
1056 * Get viewer stream from stream id.
1057 *
1058 * RCU read side lock MUST be acquired.
1059 */
1060 struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id)
1061 {
1062 struct lttng_ht_node_u64 *node;
1063 struct lttng_ht_iter iter;
1064 struct relay_viewer_stream *stream = NULL;
1065
1066 lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter);
1067 node = lttng_ht_iter_get_node_u64(&iter);
1068 if (node == NULL) {
1069 DBG("Relay viewer stream %" PRIu64 " not found", stream_id);
1070 goto end;
1071 }
1072 stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
1073
1074 end:
1075 return stream;
1076 }
1077
1078 static
1079 void deferred_free_viewer_stream(struct rcu_head *head)
1080 {
1081 struct relay_viewer_stream *stream =
1082 caa_container_of(head, struct relay_viewer_stream, rcu_node);
1083
1084 free(stream->path_name);
1085 free(stream->channel_name);
1086 free(stream);
1087 }
1088
1089 static
1090 void delete_viewer_stream(struct relay_viewer_stream *vstream)
1091 {
1092 int delret;
1093 struct lttng_ht_iter iter;
1094
1095 iter.iter.node = &vstream->stream_n.node;
1096 delret = lttng_ht_del(viewer_streams_ht, &iter);
1097 assert(!delret);
1098 }
1099
1100 static
1101 void destroy_viewer_stream(struct relay_viewer_stream *vstream)
1102 {
1103 unsigned long ret_ref;
1104 int ret;
1105
1106 assert(vstream);
1107 ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1);
1108 assert(ret_ref >= 0);
1109
1110 if (vstream->read_fd >= 0) {
1111 ret = close(vstream->read_fd);
1112 if (ret < 0) {
1113 PERROR("close read_fd");
1114 }
1115 }
1116 if (vstream->index_read_fd >= 0) {
1117 ret = close(vstream->index_read_fd);
1118 if (ret < 0) {
1119 PERROR("close index_read_fd");
1120 }
1121 }
1122
1123 /*
1124 * If the only stream left in the HT is the metadata stream,
1125 * we need to remove it because we won't detect a EOF for this
1126 * stream.
1127 */
1128 if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) {
1129 delete_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1130 destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
1131 vstream->ctf_trace->metadata_stream = NULL;
1132 DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id);
1133 /*
1134 * The streaming-side is already closed and we can't receive a new
1135 * stream concurrently at this point (since the session is being
1136 * destroyed), so when we detect the refcount equals 0, we are the
1137 * only owners of the ctf_trace and we can free it ourself.
1138 */
1139 free(vstream->ctf_trace);
1140 }
1141
1142 call_rcu(&vstream->rcu_node, deferred_free_viewer_stream);
1143 }
1144
1145 /*
1146 * Send the next index for a stream.
1147 *
1148 * Return 0 on success or else a negative value.
1149 */
1150 static
1151 int viewer_get_next_index(struct relay_command *cmd,
1152 struct lttng_ht *sessions_ht)
1153 {
1154 int ret;
1155 struct lttng_viewer_get_next_index request_index;
1156 struct lttng_viewer_index viewer_index;
1157 struct ctf_packet_index packet_index;
1158 struct relay_viewer_stream *vstream;
1159 struct relay_stream *rstream;
1160
1161 assert(cmd);
1162 assert(sessions_ht);
1163
1164 DBG("Viewer get next index");
1165
1166 if (cmd->version_check_done == 0) {
1167 ERR("Trying to request index before version check");
1168 ret = -1;
1169 goto end_no_session;
1170 }
1171
1172 health_code_update();
1173 ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
1174 sizeof(request_index), 0);
1175 if (ret < 0 || ret != sizeof(request_index)) {
1176 ret = -1;
1177 ERR("Relay didn't receive the whole packet");
1178 goto end;
1179 }
1180 health_code_update();
1181
1182 rcu_read_lock();
1183 vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
1184 if (!vstream) {
1185 ret = -1;
1186 goto end_unlock;
1187 }
1188
1189 memset(&viewer_index, 0, sizeof(viewer_index));
1190
1191 /*
1192 * The viewer should not ask for index on metadata stream.
1193 */
1194 if (vstream->metadata_flag) {
1195 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1196 goto send_reply;
1197 }
1198
1199 /* First time, we open the index file */
1200 if (vstream->index_read_fd < 0) {
1201 ret = open_index(vstream);
1202 if (ret == -ENOENT) {
1203 /*
1204 * The index is created only when the first data packet arrives, it
1205 * might not be ready at the beginning of the session
1206 */
1207 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1208 goto send_reply;
1209 } else if (ret < 0) {
1210 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1211 goto send_reply;
1212 }
1213 }
1214
1215 rstream = relay_stream_find_by_id(vstream->stream_handle);
1216 if (rstream) {
1217 if (vstream->abort_flag) {
1218 /* Rotate on abort (overwrite). */
1219 DBG("Viewer rotate because of overwrite");
1220 ret = rotate_viewer_stream(vstream, rstream);
1221 if (ret < 0) {
1222 goto end_unlock;
1223 } else if (ret == 1) {
1224 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1225 delete_viewer_stream(vstream);
1226 destroy_viewer_stream(vstream);
1227 goto send_reply;
1228 }
1229 }
1230 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
1231 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1232 if (rstream->beacon_ts_end != -1ULL &&
1233 vstream->last_sent_index == rstream->total_index_received) {
1234 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1235 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1236 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1237 goto send_reply;
1238 /*
1239 * Reader and writer are working in the same tracefile, so we care
1240 * about the number of index received and sent. Otherwise, we read
1241 * up to EOF.
1242 */
1243 } else if (rstream->total_index_received <= vstream->last_sent_index
1244 && !vstream->close_write_flag) {
1245 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1246 /* No new index to send, retry later. */
1247 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1248 goto send_reply;
1249 }
1250 }
1251 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1252 } else if (!rstream && vstream->close_write_flag &&
1253 vstream->total_index_received == vstream->last_sent_index) {
1254 /* Last index sent and current tracefile closed in write */
1255 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1256 delete_viewer_stream(vstream);
1257 destroy_viewer_stream(vstream);
1258 goto send_reply;
1259 } else {
1260 vstream->close_write_flag = 1;
1261 }
1262
1263 if (!vstream->ctf_trace->metadata_received ||
1264 vstream->ctf_trace->metadata_received >
1265 vstream->ctf_trace->metadata_sent) {
1266 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1267 }
1268
1269 pthread_mutex_lock(&vstream->overwrite_lock);
1270 if (vstream->abort_flag) {
1271 /*
1272 * The file is being overwritten by the writer, we cannot
1273 * use it.
1274 */
1275 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1276 pthread_mutex_unlock(&vstream->overwrite_lock);
1277 ret = rotate_viewer_stream(vstream, rstream);
1278 if (ret < 0) {
1279 goto end_unlock;
1280 } else if (ret == 1) {
1281 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1282 delete_viewer_stream(vstream);
1283 destroy_viewer_stream(vstream);
1284 goto send_reply;
1285 }
1286 goto send_reply;
1287 }
1288 ret = lttng_read(vstream->index_read_fd, &packet_index,
1289 sizeof(packet_index));
1290 pthread_mutex_unlock(&vstream->overwrite_lock);
1291 if (ret < sizeof(packet_index)) {
1292 /*
1293 * The tracefile is closed in write, so we read up to EOF.
1294 */
1295 if (vstream->close_write_flag == 1) {
1296 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1297 /* Rotate on normal EOF */
1298 ret = rotate_viewer_stream(vstream, rstream);
1299 if (ret < 0) {
1300 goto end_unlock;
1301 } else if (ret == 1) {
1302 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1303 delete_viewer_stream(vstream);
1304 destroy_viewer_stream(vstream);
1305 goto send_reply;
1306 }
1307 } else {
1308 PERROR("Relay reading index file %d",
1309 vstream->index_read_fd);
1310 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1311 }
1312 goto send_reply;
1313 } else {
1314 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1315 vstream->last_sent_index++;
1316 }
1317
1318 /*
1319 * Indexes are stored in big endian, no need to switch before sending.
1320 */
1321 viewer_index.offset = packet_index.offset;
1322 viewer_index.packet_size = packet_index.packet_size;
1323 viewer_index.content_size = packet_index.content_size;
1324 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1325 viewer_index.timestamp_end = packet_index.timestamp_end;
1326 viewer_index.events_discarded = packet_index.events_discarded;
1327 viewer_index.stream_id = packet_index.stream_id;
1328
1329 send_reply:
1330 viewer_index.flags = htobe32(viewer_index.flags);
1331 health_code_update();
1332 ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
1333 sizeof(viewer_index), 0);
1334 if (ret < 0) {
1335 ERR("Relay index to viewer");
1336 goto end_unlock;
1337 }
1338 health_code_update();
1339
1340 DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
1341 vstream->last_sent_index, vstream->stream_handle);
1342
1343 end_unlock:
1344 rcu_read_unlock();
1345
1346 end_no_session:
1347 end:
1348 return ret;
1349 }
1350
1351 /*
1352 * Send the next index for a stream
1353 *
1354 * Return 0 on success or else a negative value.
1355 */
1356 static
1357 int viewer_get_packet(struct relay_command *cmd)
1358 {
1359 int ret, send_data = 0;
1360 char *data = NULL;
1361 uint32_t len = 0;
1362 ssize_t read_len;
1363 struct lttng_viewer_get_packet get_packet_info;
1364 struct lttng_viewer_trace_packet reply;
1365 struct relay_viewer_stream *stream;
1366
1367 assert(cmd);
1368
1369 DBG2("Relay get data packet");
1370
1371 if (cmd->version_check_done == 0) {
1372 ERR("Trying to get packet before version check");
1373 ret = -1;
1374 goto end;
1375 }
1376
1377 health_code_update();
1378 ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
1379 sizeof(get_packet_info), 0);
1380 if (ret < 0 || ret != sizeof(get_packet_info)) {
1381 ret = -1;
1382 ERR("Relay didn't receive the whole packet");
1383 goto end;
1384 }
1385 health_code_update();
1386
1387 /* From this point on, the error label can be reached. */
1388 memset(&reply, 0, sizeof(reply));
1389
1390 rcu_read_lock();
1391 stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
1392 if (!stream) {
1393 goto error;
1394 }
1395 assert(stream->ctf_trace);
1396
1397 /*
1398 * First time we read this stream, we need open the tracefile, we should
1399 * only arrive here if an index has already been sent to the viewer, so the
1400 * tracefile must exist, if it does not it is a fatal error.
1401 */
1402 if (stream->read_fd < 0) {
1403 char fullpath[PATH_MAX];
1404
1405 if (stream->tracefile_count > 0) {
1406 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1407 stream->channel_name,
1408 stream->tracefile_count_current);
1409 } else {
1410 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1411 stream->channel_name);
1412 }
1413 if (ret < 0) {
1414 goto error;
1415 }
1416 ret = open(fullpath, O_RDONLY);
1417 if (ret < 0) {
1418 PERROR("Relay opening trace file");
1419 goto error;
1420 }
1421 stream->read_fd = ret;
1422 }
1423
1424 if (!stream->ctf_trace->metadata_received ||
1425 stream->ctf_trace->metadata_received >
1426 stream->ctf_trace->metadata_sent) {
1427 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1428 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1429 goto send_reply;
1430 }
1431
1432 len = be32toh(get_packet_info.len);
1433 data = zmalloc(len);
1434 if (!data) {
1435 PERROR("relay data zmalloc");
1436 goto error;
1437 }
1438
1439 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1440 if (ret < 0) {
1441 /*
1442 * If the read fd was closed by the streaming side, the
1443 * abort_flag will be set to 1, otherwise it is an error.
1444 */
1445 if (stream->abort_flag == 0) {
1446 PERROR("lseek");
1447 goto error;
1448 }
1449 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1450 goto send_reply;
1451 }
1452 read_len = lttng_read(stream->read_fd, data, len);
1453 if (read_len < len) {
1454 /*
1455 * If the read fd was closed by the streaming side, the
1456 * abort_flag will be set to 1, otherwise it is an error.
1457 */
1458 if (stream->abort_flag == 0) {
1459 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1460 stream->read_fd,
1461 be64toh(get_packet_info.offset));
1462 goto error;
1463 } else {
1464 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1465 goto send_reply;
1466 }
1467 }
1468 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1469 reply.len = htobe32(len);
1470 send_data = 1;
1471 goto send_reply;
1472
1473 error:
1474 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1475
1476 send_reply:
1477 reply.flags = htobe32(reply.flags);
1478
1479 health_code_update();
1480 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1481 if (ret < 0) {
1482 ERR("Relay data header to viewer");
1483 goto end_unlock;
1484 }
1485 health_code_update();
1486
1487 if (send_data) {
1488 health_code_update();
1489 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1490 if (ret < 0) {
1491 ERR("Relay send data to viewer");
1492 goto end_unlock;
1493 }
1494 health_code_update();
1495 }
1496
1497 DBG("Sent %u bytes for stream %" PRIu64, len,
1498 be64toh(get_packet_info.stream_id));
1499
1500 end_unlock:
1501 free(data);
1502 rcu_read_unlock();
1503
1504 end:
1505 return ret;
1506 }
1507
1508 /*
1509 * Send the session's metadata
1510 *
1511 * Return 0 on success else a negative value.
1512 */
1513 static
1514 int viewer_get_metadata(struct relay_command *cmd)
1515 {
1516 int ret = 0;
1517 ssize_t read_len;
1518 uint64_t len = 0;
1519 char *data = NULL;
1520 struct lttng_viewer_get_metadata request;
1521 struct lttng_viewer_metadata_packet reply;
1522 struct relay_viewer_stream *stream;
1523
1524 assert(cmd);
1525
1526 DBG("Relay get metadata");
1527
1528 if (cmd->version_check_done == 0) {
1529 ERR("Trying to get metadata before version check");
1530 ret = -1;
1531 goto end;
1532 }
1533
1534 health_code_update();
1535 ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
1536 sizeof(request), 0);
1537 if (ret < 0 || ret != sizeof(request)) {
1538 ret = -1;
1539 ERR("Relay didn't receive the whole packet");
1540 goto end;
1541 }
1542 health_code_update();
1543
1544 rcu_read_lock();
1545 stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
1546 if (!stream || !stream->metadata_flag) {
1547 ERR("Invalid metadata stream");
1548 goto error;
1549 }
1550 assert(stream->ctf_trace);
1551 assert(stream->ctf_trace->metadata_sent <=
1552 stream->ctf_trace->metadata_received);
1553
1554 len = stream->ctf_trace->metadata_received -
1555 stream->ctf_trace->metadata_sent;
1556 if (len == 0) {
1557 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1558 goto send_reply;
1559 }
1560
1561 /* first time, we open the metadata file */
1562 if (stream->read_fd < 0) {
1563 char fullpath[PATH_MAX];
1564
1565 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1566 stream->channel_name);
1567 if (ret < 0) {
1568 goto error;
1569 }
1570 ret = open(fullpath, O_RDONLY);
1571 if (ret < 0) {
1572 PERROR("Relay opening metadata file");
1573 goto error;
1574 }
1575 stream->read_fd = ret;
1576 }
1577
1578 reply.len = htobe64(len);
1579 data = zmalloc(len);
1580 if (!data) {
1581 PERROR("viewer metadata zmalloc");
1582 goto error;
1583 }
1584
1585 read_len = lttng_read(stream->read_fd, data, len);
1586 if (read_len < len) {
1587 PERROR("Relay reading metadata file");
1588 goto error;
1589 }
1590 stream->ctf_trace->metadata_sent += read_len;
1591 reply.status = htobe32(VIEWER_METADATA_OK);
1592 goto send_reply;
1593
1594 error:
1595 reply.status = htobe32(VIEWER_METADATA_ERR);
1596
1597 send_reply:
1598 health_code_update();
1599 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
1600 if (ret < 0) {
1601 ERR("Relay data header to viewer");
1602 goto end_unlock;
1603 }
1604 health_code_update();
1605
1606 if (len > 0) {
1607 ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
1608 if (ret < 0) {
1609 ERR("Relay send data to viewer");
1610 goto end_unlock;
1611 }
1612 }
1613
1614 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1615 be64toh(request.stream_id));
1616
1617 DBG("Metadata sent");
1618
1619 end_unlock:
1620 free(data);
1621 rcu_read_unlock();
1622 end:
1623 return ret;
1624 }
1625
1626 /*
1627 * live_relay_unknown_command: send -1 if received unknown command
1628 */
1629 static
1630 void live_relay_unknown_command(struct relay_command *cmd)
1631 {
1632 struct lttcomm_relayd_generic_reply reply;
1633 int ret;
1634
1635 reply.ret_code = htobe32(LTTNG_ERR_UNK);
1636 ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
1637 sizeof(struct lttcomm_relayd_generic_reply), 0);
1638 if (ret < 0) {
1639 ERR("Relay sending unknown command");
1640 }
1641 }
1642
1643 /*
1644 * Process the commands received on the control socket
1645 */
1646 static
1647 int process_control(struct lttng_viewer_cmd *recv_hdr,
1648 struct relay_command *cmd, struct lttng_ht *sessions_ht)
1649 {
1650 int ret = 0;
1651
1652 switch (be32toh(recv_hdr->cmd)) {
1653 case VIEWER_CONNECT:
1654 ret = viewer_connect(cmd);
1655 break;
1656 case VIEWER_LIST_SESSIONS:
1657 ret = viewer_list_sessions(cmd, sessions_ht);
1658 break;
1659 case VIEWER_ATTACH_SESSION:
1660 ret = viewer_attach_session(cmd, sessions_ht);
1661 break;
1662 case VIEWER_GET_NEXT_INDEX:
1663 ret = viewer_get_next_index(cmd, sessions_ht);
1664 break;
1665 case VIEWER_GET_PACKET:
1666 ret = viewer_get_packet(cmd);
1667 break;
1668 case VIEWER_GET_METADATA:
1669 ret = viewer_get_metadata(cmd);
1670 break;
1671 default:
1672 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1673 live_relay_unknown_command(cmd);
1674 ret = -1;
1675 goto end;
1676 }
1677
1678 end:
1679 return ret;
1680 }
1681
1682 static
1683 void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1684 {
1685 int ret;
1686
1687 assert(events);
1688
1689 lttng_poll_del(events, pollfd);
1690
1691 ret = close(pollfd);
1692 if (ret < 0) {
1693 ERR("Closing pollfd %d", pollfd);
1694 }
1695 }
1696
1697 /*
1698 * Create and add connection to the given hash table.
1699 *
1700 * Return poll add value or else -1 on error.
1701 */
1702 static
1703 int add_connection(int fd, struct lttng_poll_event *events,
1704 struct lttng_ht *relay_connections_ht)
1705 {
1706 int ret;
1707 struct relay_command *relay_connection;
1708
1709 assert(events);
1710 assert(relay_connections_ht);
1711
1712 relay_connection = zmalloc(sizeof(struct relay_command));
1713 if (relay_connection == NULL) {
1714 PERROR("Relay command zmalloc");
1715 goto error;
1716 }
1717
1718 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1719 if (ret < sizeof(*relay_connection)) {
1720 PERROR("read relay cmd pipe");
1721 goto error_read;
1722 }
1723
1724 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1725 (unsigned long) relay_connection->sock->fd);
1726 rcu_read_lock();
1727 lttng_ht_add_unique_ulong(relay_connections_ht,
1728 &relay_connection->sock_n);
1729 rcu_read_unlock();
1730
1731 return lttng_poll_add(events, relay_connection->sock->fd,
1732 LPOLLIN | LPOLLRDHUP);
1733
1734 error_read:
1735 free(relay_connection);
1736 error:
1737 return -1;
1738 }
1739
1740 static
1741 void deferred_free_connection(struct rcu_head *head)
1742 {
1743 struct relay_command *relay_connection =
1744 caa_container_of(head, struct relay_command, rcu_node);
1745
1746 if (relay_connection->session &&
1747 relay_connection->session->viewer_attached > 0) {
1748 relay_connection->session->viewer_attached--;
1749 }
1750 lttcomm_destroy_sock(relay_connection->sock);
1751 free(relay_connection);
1752 }
1753
1754 /*
1755 * Delete all streams for a specific session ID.
1756 */
1757 static
1758 void viewer_del_streams(uint64_t session_id)
1759 {
1760 struct relay_viewer_stream *stream;
1761 struct lttng_ht_iter iter;
1762
1763 rcu_read_lock();
1764 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1765 stream_n.node) {
1766 health_code_update();
1767
1768 if (stream->session_id != session_id) {
1769 continue;
1770 }
1771
1772 delete_viewer_stream(stream);
1773 assert(stream->ctf_trace);
1774
1775 if (stream->metadata_flag) {
1776 /*
1777 * The metadata viewer stream is destroyed once the refcount on the
1778 * ctf trace goes to 0 in the destroy stream function thus there is
1779 * no explicit call to that function here.
1780 */
1781 stream->ctf_trace->metadata_sent = 0;
1782 stream->ctf_trace->viewer_metadata_stream = NULL;
1783 } else {
1784 destroy_viewer_stream(stream);
1785 }
1786 }
1787 rcu_read_unlock();
1788 }
1789
1790 /*
1791 * Delete and free a connection.
1792 *
1793 * RCU read side lock MUST be acquired.
1794 */
1795 static
1796 void del_connection(struct lttng_ht *relay_connections_ht,
1797 struct lttng_ht_iter *iter, struct relay_command *relay_connection)
1798 {
1799 int ret;
1800
1801 assert(relay_connections_ht);
1802 assert(iter);
1803 assert(relay_connection);
1804
1805 DBG("Cleaning connection of session ID %" PRIu64,
1806 relay_connection->session_id);
1807
1808 ret = lttng_ht_del(relay_connections_ht, iter);
1809 assert(!ret);
1810
1811 viewer_del_streams(relay_connection->session_id);
1812
1813 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1814 }
1815
1816 /*
1817 * This thread does the actual work
1818 */
1819 static
1820 void *thread_worker(void *data)
1821 {
1822 int ret, err = -1;
1823 uint32_t nb_fd;
1824 struct relay_command *relay_connection;
1825 struct lttng_poll_event events;
1826 struct lttng_ht *relay_connections_ht;
1827 struct lttng_ht_node_ulong *node;
1828 struct lttng_ht_iter iter;
1829 struct lttng_viewer_cmd recv_hdr;
1830 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1831 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
1832
1833 DBG("[thread] Live viewer relay worker started");
1834
1835 rcu_register_thread();
1836
1837 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1838
1839 if (testpoint(relayd_thread_live_worker)) {
1840 goto error_testpoint;
1841 }
1842
1843 /* table of connections indexed on socket */
1844 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1845 if (!relay_connections_ht) {
1846 goto relay_connections_ht_error;
1847 }
1848
1849 ret = create_thread_poll_set(&events, 2);
1850 if (ret < 0) {
1851 goto error_poll_create;
1852 }
1853
1854 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1855 if (ret < 0) {
1856 goto error;
1857 }
1858
1859 restart:
1860 while (1) {
1861 int i;
1862
1863 health_code_update();
1864
1865 /* Infinite blocking call, waiting for transmission */
1866 DBG3("Relayd live viewer worker thread polling...");
1867 health_poll_entry();
1868 ret = lttng_poll_wait(&events, -1);
1869 health_poll_exit();
1870 if (ret < 0) {
1871 /*
1872 * Restart interrupted system call.
1873 */
1874 if (errno == EINTR) {
1875 goto restart;
1876 }
1877 goto error;
1878 }
1879
1880 nb_fd = ret;
1881
1882 /*
1883 * Process control. The control connection is prioritised so we don't
1884 * starve it with high throughput tracing data on the data
1885 * connection.
1886 */
1887 for (i = 0; i < nb_fd; i++) {
1888 /* Fetch once the poll data */
1889 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1890 int pollfd = LTTNG_POLL_GETFD(&events, i);
1891
1892 health_code_update();
1893
1894 /* Thread quit pipe has been closed. Killing thread. */
1895 ret = check_thread_quit_pipe(pollfd, revents);
1896 if (ret) {
1897 err = 0;
1898 goto exit;
1899 }
1900
1901 /* Inspect the relay cmd pipe for new connection */
1902 if (pollfd == live_relay_cmd_pipe[0]) {
1903 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1904 ERR("Relay live pipe error");
1905 goto error;
1906 } else if (revents & LPOLLIN) {
1907 DBG("Relay live viewer command received");
1908 ret = add_connection(live_relay_cmd_pipe[0],
1909 &events, relay_connections_ht);
1910 if (ret < 0) {
1911 goto error;
1912 }
1913 }
1914 } else if (revents) {
1915 rcu_read_lock();
1916 lttng_ht_lookup(relay_connections_ht,
1917 (void *)((unsigned long) pollfd), &iter);
1918 node = lttng_ht_iter_get_node_ulong(&iter);
1919 if (node == NULL) {
1920 DBG2("Relay viewer sock %d not found", pollfd);
1921 rcu_read_unlock();
1922 goto error;
1923 }
1924 relay_connection = caa_container_of(node, struct relay_command,
1925 sock_n);
1926
1927 if (revents & (LPOLLERR)) {
1928 cleanup_poll_connection(&events, pollfd);
1929 del_connection(relay_connections_ht, &iter,
1930 relay_connection);
1931 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1932 DBG("Viewer socket %d hung up", pollfd);
1933 cleanup_poll_connection(&events, pollfd);
1934 del_connection(relay_connections_ht, &iter,
1935 relay_connection);
1936 } else if (revents & LPOLLIN) {
1937 ret = relay_connection->sock->ops->recvmsg(
1938 relay_connection->sock, &recv_hdr,
1939 sizeof(struct lttng_viewer_cmd),
1940 0);
1941 /* connection closed */
1942 if (ret <= 0) {
1943 cleanup_poll_connection(&events, pollfd);
1944 del_connection(relay_connections_ht, &iter,
1945 relay_connection);
1946 DBG("Viewer control connection closed with %d",
1947 pollfd);
1948 } else {
1949 if (relay_connection->session) {
1950 DBG2("Relay viewer worker receiving data for "
1951 "session: %" PRIu64,
1952 relay_connection->session->id);
1953 }
1954 ret = process_control(&recv_hdr, relay_connection,
1955 sessions_ht);
1956 if (ret < 0) {
1957 /* Clear the session on error. */
1958 cleanup_poll_connection(&events, pollfd);
1959 del_connection(relay_connections_ht, &iter,
1960 relay_connection);
1961 DBG("Viewer connection closed with %d", pollfd);
1962 }
1963 }
1964 }
1965 rcu_read_unlock();
1966 }
1967 }
1968 }
1969
1970 exit:
1971 error:
1972 lttng_poll_clean(&events);
1973
1974 /* empty the hash table and free the memory */
1975 rcu_read_lock();
1976 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
1977 health_code_update();
1978
1979 node = lttng_ht_iter_get_node_ulong(&iter);
1980 if (!node) {
1981 continue;
1982 }
1983
1984 relay_connection = caa_container_of(node, struct relay_command,
1985 sock_n);
1986 del_connection(relay_connections_ht, &iter, relay_connection);
1987 }
1988 rcu_read_unlock();
1989 error_poll_create:
1990 lttng_ht_destroy(relay_connections_ht);
1991 relay_connections_ht_error:
1992 /* Close relay cmd pipes */
1993 utils_close_pipe(live_relay_cmd_pipe);
1994 if (err) {
1995 DBG("Viewer worker thread exited with error");
1996 }
1997 DBG("Viewer worker thread cleanup complete");
1998 error_testpoint:
1999 if (err) {
2000 health_error();
2001 ERR("Health error occurred in %s", __func__);
2002 }
2003 health_unregister(health_relayd);
2004 stop_threads();
2005 rcu_unregister_thread();
2006 return NULL;
2007 }
2008
2009 /*
2010 * Create the relay command pipe to wake thread_manage_apps.
2011 * Closed in cleanup().
2012 */
2013 static int create_relay_cmd_pipe(void)
2014 {
2015 int ret;
2016
2017 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
2018
2019 return ret;
2020 }
2021
2022 void live_stop_threads(void)
2023 {
2024 int ret;
2025 void *status;
2026
2027 stop_threads();
2028
2029 ret = pthread_join(live_listener_thread, &status);
2030 if (ret != 0) {
2031 PERROR("pthread_join live listener");
2032 goto error; /* join error, exit without cleanup */
2033 }
2034
2035 ret = pthread_join(live_worker_thread, &status);
2036 if (ret != 0) {
2037 PERROR("pthread_join live worker");
2038 goto error; /* join error, exit without cleanup */
2039 }
2040
2041 ret = pthread_join(live_dispatcher_thread, &status);
2042 if (ret != 0) {
2043 PERROR("pthread_join live dispatcher");
2044 goto error; /* join error, exit without cleanup */
2045 }
2046
2047 cleanup();
2048
2049 error:
2050 return;
2051 }
2052
2053 /*
2054 * main
2055 */
2056 int live_start_threads(struct lttng_uri *uri,
2057 struct relay_local_data *relay_ctx)
2058 {
2059 int ret = 0;
2060 void *status;
2061 int is_root;
2062
2063 assert(uri);
2064 live_uri = uri;
2065
2066 /* Check if daemon is UID = 0 */
2067 is_root = !getuid();
2068
2069 if (!is_root) {
2070 if (live_uri->port < 1024) {
2071 ERR("Need to be root to use ports < 1024");
2072 ret = -1;
2073 goto exit;
2074 }
2075 }
2076
2077 /* Setup the thread apps communication pipe. */
2078 if ((ret = create_relay_cmd_pipe()) < 0) {
2079 goto exit;
2080 }
2081
2082 /* Init relay command queue. */
2083 cds_wfq_init(&viewer_cmd_queue.queue);
2084
2085 /* Set up max poll set size */
2086 lttng_poll_set_max_size();
2087
2088 /* Setup the dispatcher thread */
2089 ret = pthread_create(&live_dispatcher_thread, NULL,
2090 thread_dispatcher, (void *) NULL);
2091 if (ret != 0) {
2092 PERROR("pthread_create viewer dispatcher");
2093 goto exit_dispatcher;
2094 }
2095
2096 /* Setup the worker thread */
2097 ret = pthread_create(&live_worker_thread, NULL,
2098 thread_worker, relay_ctx);
2099 if (ret != 0) {
2100 PERROR("pthread_create viewer worker");
2101 goto exit_worker;
2102 }
2103
2104 /* Setup the listener thread */
2105 ret = pthread_create(&live_listener_thread, NULL,
2106 thread_listener, (void *) NULL);
2107 if (ret != 0) {
2108 PERROR("pthread_create viewer listener");
2109 goto exit_listener;
2110 }
2111
2112 ret = 0;
2113 goto end;
2114
2115 exit_listener:
2116 ret = pthread_join(live_listener_thread, &status);
2117 if (ret != 0) {
2118 PERROR("pthread_join live listener");
2119 goto error; /* join error, exit without cleanup */
2120 }
2121
2122 exit_worker:
2123 ret = pthread_join(live_worker_thread, &status);
2124 if (ret != 0) {
2125 PERROR("pthread_join live worker");
2126 goto error; /* join error, exit without cleanup */
2127 }
2128
2129 exit_dispatcher:
2130 ret = pthread_join(live_dispatcher_thread, &status);
2131 if (ret != 0) {
2132 PERROR("pthread_join live dispatcher");
2133 goto error; /* join error, exit without cleanup */
2134 }
2135
2136 exit:
2137 cleanup();
2138
2139 end:
2140 error:
2141 return ret;
2142 }
This page took 0.124658 seconds and 3 git commands to generate.