relayd: add `lttcomm_relayd_command_str()`
[lttng-tools.git] / src / bin / lttng-relayd / live.cpp
1 /*
2 * Copyright (C) 2013 Julien Desfossez <jdesfossez@efficios.com>
3 * Copyright (C) 2013 David Goulet <dgoulet@efficios.com>
4 * Copyright (C) 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #define _LGPL_SOURCE
11 #include <fcntl.h>
12 #include <getopt.h>
13 #include <grp.h>
14 #include <inttypes.h>
15 #include <limits.h>
16 #include <pthread.h>
17 #include <signal.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/mman.h>
22 #include <sys/mount.h>
23 #include <sys/resource.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <unistd.h>
29 #include <urcu/futex.h>
30 #include <urcu/rculist.h>
31 #include <urcu/uatomic.h>
32 #include <string>
33
34 #include <common/common.h>
35 #include <common/compat/endian.h>
36 #include <common/compat/poll.h>
37 #include <common/compat/socket.h>
38 #include <common/defaults.h>
39 #include <common/fd-tracker/utils.h>
40 #include <common/fs-handle.h>
41 #include <common/futex.h>
42 #include <common/index/index.h>
43 #include <common/sessiond-comm/inet.h>
44 #include <common/sessiond-comm/relayd.h>
45 #include <common/sessiond-comm/sessiond-comm.h>
46 #include <common/uri.h>
47 #include <common/utils.h>
48 #include <lttng/lttng.h>
49
50 #include "cmd.h"
51 #include "connection.h"
52 #include "ctf-trace.h"
53 #include "health-relayd.h"
54 #include "live.h"
55 #include "lttng-relayd.h"
56 #include "session.h"
57 #include "stream.h"
58 #include "testpoint.h"
59 #include "utils.h"
60 #include "viewer-session.h"
61 #include "viewer-stream.h"
62
63 #define SESSION_BUF_DEFAULT_COUNT 16
64
65 static struct lttng_uri *live_uri;
66
67 /*
68 * This pipe is used to inform the worker thread that a command is queued and
69 * ready to be processed.
70 */
71 static int live_conn_pipe[2] = { -1, -1 };
72
73 /* Shared between threads */
74 static int live_dispatch_thread_exit;
75
76 static pthread_t live_listener_thread;
77 static pthread_t live_dispatcher_thread;
78 static pthread_t live_worker_thread;
79
80 /*
81 * Relay command queue.
82 *
83 * The live_thread_listener and live_thread_dispatcher communicate with this
84 * queue.
85 */
86 static struct relay_conn_queue viewer_conn_queue;
87
88 static uint64_t last_relay_viewer_session_id;
89 static pthread_mutex_t last_relay_viewer_session_id_lock =
90 PTHREAD_MUTEX_INITIALIZER;
91
92 /*
93 * Cleanup the daemon
94 */
95 static
96 void cleanup_relayd_live(void)
97 {
98 DBG("Cleaning up");
99
100 free(live_uri);
101 }
102
103 /*
104 * Receive a request buffer using a given socket, destination allocated buffer
105 * of length size.
106 *
107 * Return the size of the received message or else a negative value on error
108 * with errno being set by recvmsg() syscall.
109 */
110 static
111 ssize_t recv_request(struct lttcomm_sock *sock, void *buf, size_t size)
112 {
113 ssize_t ret;
114
115 ret = sock->ops->recvmsg(sock, buf, size, 0);
116 if (ret < 0 || ret != size) {
117 if (ret == 0) {
118 /* Orderly shutdown. Not necessary to print an error. */
119 DBG("Socket %d did an orderly shutdown", sock->fd);
120 } else {
121 ERR("Relay failed to receive request.");
122 }
123 ret = -1;
124 }
125
126 return ret;
127 }
128
129 /*
130 * Send a response buffer using a given socket, source allocated buffer of
131 * length size.
132 *
133 * Return the size of the sent message or else a negative value on error with
134 * errno being set by sendmsg() syscall.
135 */
136 static
137 ssize_t send_response(struct lttcomm_sock *sock, void *buf, size_t size)
138 {
139 ssize_t ret;
140
141 ret = sock->ops->sendmsg(sock, buf, size, 0);
142 if (ret < 0) {
143 ERR("Relayd failed to send response.");
144 }
145
146 return ret;
147 }
148
149 /*
150 * Atomically check if new streams got added in one of the sessions attached
151 * and reset the flag to 0.
152 *
153 * Returns 1 if new streams got added, 0 if nothing changed, a negative value
154 * on error.
155 */
156 static
157 int check_new_streams(struct relay_connection *conn)
158 {
159 struct relay_session *session;
160 unsigned long current_val;
161 int ret = 0;
162
163 if (!conn->viewer_session) {
164 goto end;
165 }
166 rcu_read_lock();
167 cds_list_for_each_entry_rcu(session,
168 &conn->viewer_session->session_list,
169 viewer_session_node) {
170 if (!session_get(session)) {
171 continue;
172 }
173 current_val = uatomic_cmpxchg(&session->new_streams, 1, 0);
174 ret = current_val;
175 session_put(session);
176 if (ret == 1) {
177 goto end;
178 }
179 }
180 end:
181 rcu_read_unlock();
182 return ret;
183 }
184
185 /*
186 * Send viewer streams to the given socket. The ignore_sent_flag indicates if
187 * this function should ignore the sent flag or not.
188 *
189 * Return 0 on success or else a negative value.
190 */
191 static
192 ssize_t send_viewer_streams(struct lttcomm_sock *sock,
193 uint64_t session_id, unsigned int ignore_sent_flag)
194 {
195 ssize_t ret;
196 struct lttng_ht_iter iter;
197 struct relay_viewer_stream *vstream;
198
199 rcu_read_lock();
200
201 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, vstream,
202 stream_n.node) {
203 struct ctf_trace *ctf_trace;
204 struct lttng_viewer_stream send_stream = {};
205
206 health_code_update();
207
208 if (!viewer_stream_get(vstream)) {
209 continue;
210 }
211
212 pthread_mutex_lock(&vstream->stream->lock);
213 /* Ignore if not the same session. */
214 if (vstream->stream->trace->session->id != session_id ||
215 (!ignore_sent_flag && vstream->sent_flag)) {
216 pthread_mutex_unlock(&vstream->stream->lock);
217 viewer_stream_put(vstream);
218 continue;
219 }
220
221 ctf_trace = vstream->stream->trace;
222 send_stream.id = htobe64(vstream->stream->stream_handle);
223 send_stream.ctf_trace_id = htobe64(ctf_trace->id);
224 send_stream.metadata_flag = htobe32(
225 vstream->stream->is_metadata);
226 if (lttng_strncpy(send_stream.path_name, vstream->path_name,
227 sizeof(send_stream.path_name))) {
228 pthread_mutex_unlock(&vstream->stream->lock);
229 viewer_stream_put(vstream);
230 ret = -1; /* Error. */
231 goto end_unlock;
232 }
233 if (lttng_strncpy(send_stream.channel_name,
234 vstream->channel_name,
235 sizeof(send_stream.channel_name))) {
236 pthread_mutex_unlock(&vstream->stream->lock);
237 viewer_stream_put(vstream);
238 ret = -1; /* Error. */
239 goto end_unlock;
240 }
241
242 DBG("Sending stream %" PRIu64 " to viewer",
243 vstream->stream->stream_handle);
244 vstream->sent_flag = 1;
245 pthread_mutex_unlock(&vstream->stream->lock);
246
247 ret = send_response(sock, &send_stream, sizeof(send_stream));
248 viewer_stream_put(vstream);
249 if (ret < 0) {
250 goto end_unlock;
251 }
252 }
253
254 ret = 0;
255
256 end_unlock:
257 rcu_read_unlock();
258 return ret;
259 }
260
261 /*
262 * Create every viewer stream possible for the given session with the seek
263 * type. Three counters *can* be return which are in order the total amount of
264 * viewer stream of the session, the number of unsent stream and the number of
265 * stream created. Those counters can be NULL and thus will be ignored.
266 *
267 * session must be locked to ensure that we see either none or all initial
268 * streams for a session, but no intermediate state..
269 *
270 * Return 0 on success or else a negative value.
271 */
272 static int make_viewer_streams(struct relay_session *relay_session,
273 struct relay_viewer_session *viewer_session,
274 enum lttng_viewer_seek seek_t,
275 uint32_t *nb_total,
276 uint32_t *nb_unsent,
277 uint32_t *nb_created,
278 bool *closed)
279 {
280 int ret;
281 struct lttng_ht_iter iter;
282 struct ctf_trace *ctf_trace;
283 struct relay_stream *relay_stream = NULL;
284
285 LTTNG_ASSERT(relay_session);
286 ASSERT_LOCKED(relay_session->lock);
287
288 if (relay_session->connection_closed) {
289 *closed = true;
290 }
291
292 /*
293 * Create viewer streams for relay streams that are ready to be
294 * used for a the given session id only.
295 */
296 rcu_read_lock();
297 cds_lfht_for_each_entry (relay_session->ctf_traces_ht->ht, &iter.iter,
298 ctf_trace, node.node) {
299 bool trace_has_metadata_stream = false;
300
301 health_code_update();
302
303 if (!ctf_trace_get(ctf_trace)) {
304 continue;
305 }
306
307 /*
308 * Iterate over all the streams of the trace to see if we have a
309 * metadata stream.
310 */
311 cds_list_for_each_entry_rcu(relay_stream,
312 &ctf_trace->stream_list, stream_node)
313 {
314 bool is_metadata_stream;
315
316 pthread_mutex_lock(&relay_stream->lock);
317 is_metadata_stream = relay_stream->is_metadata;
318 pthread_mutex_unlock(&relay_stream->lock);
319
320 if (is_metadata_stream) {
321 trace_has_metadata_stream = true;
322 break;
323 }
324 }
325
326 relay_stream = NULL;
327
328 /*
329 * If there is no metadata stream in this trace at the moment
330 * and we never sent one to the viewer, skip the trace. We
331 * accept that the viewer will not see this trace at all.
332 */
333 if (!trace_has_metadata_stream &&
334 !ctf_trace->metadata_stream_sent_to_viewer) {
335 ctf_trace_put(ctf_trace);
336 continue;
337 }
338
339 cds_list_for_each_entry_rcu(relay_stream,
340 &ctf_trace->stream_list, stream_node)
341 {
342 struct relay_viewer_stream *viewer_stream;
343
344 if (!stream_get(relay_stream)) {
345 continue;
346 }
347
348 pthread_mutex_lock(&relay_stream->lock);
349 /*
350 * stream published is protected by the session lock.
351 */
352 if (!relay_stream->published) {
353 goto next;
354 }
355 viewer_stream = viewer_stream_get_by_id(
356 relay_stream->stream_handle);
357 if (!viewer_stream) {
358 struct lttng_trace_chunk *viewer_stream_trace_chunk = NULL;
359
360 /*
361 * Save that we sent the metadata stream to the
362 * viewer. So that we know what trace the viewer
363 * is aware of.
364 */
365 if (relay_stream->is_metadata) {
366 ctf_trace->metadata_stream_sent_to_viewer = true;
367 }
368
369 /*
370 * If a rotation is ongoing, use a copy of the
371 * relay stream's chunk to ensure the stream
372 * files exist.
373 *
374 * Otherwise, the viewer session's current trace
375 * chunk can be used safely.
376 */
377 if ((relay_stream->ongoing_rotation.is_set ||
378 relay_session->ongoing_rotation) &&
379 relay_stream->trace_chunk) {
380 viewer_stream_trace_chunk = lttng_trace_chunk_copy(
381 relay_stream->trace_chunk);
382 if (!viewer_stream_trace_chunk) {
383 ret = -1;
384 ctf_trace_put(ctf_trace);
385 goto error_unlock;
386 }
387 } else {
388 /*
389 * Transition the viewer session into the newest trace chunk available.
390 */
391 if (!lttng_trace_chunk_ids_equal(viewer_session->current_trace_chunk,
392 relay_stream->trace_chunk)) {
393
394 ret = viewer_session_set_trace_chunk_copy(
395 viewer_session,
396 relay_stream->trace_chunk);
397 if (ret) {
398 ret = -1;
399 ctf_trace_put(ctf_trace);
400 goto error_unlock;
401 }
402 }
403
404 if (relay_stream->trace_chunk) {
405 /*
406 * If the corresponding relay
407 * stream's trace chunk is set,
408 * the viewer stream will be
409 * created under it.
410 *
411 * Note that a relay stream can
412 * have a NULL output trace
413 * chunk (for instance, after a
414 * clear against a stopped
415 * session).
416 */
417 const bool reference_acquired = lttng_trace_chunk_get(
418 viewer_session->current_trace_chunk);
419
420 LTTNG_ASSERT(reference_acquired);
421 viewer_stream_trace_chunk =
422 viewer_session->current_trace_chunk;
423 }
424 }
425
426 viewer_stream = viewer_stream_create(
427 relay_stream,
428 viewer_stream_trace_chunk,
429 seek_t);
430 lttng_trace_chunk_put(viewer_stream_trace_chunk);
431 viewer_stream_trace_chunk = NULL;
432 if (!viewer_stream) {
433 ret = -1;
434 ctf_trace_put(ctf_trace);
435 goto error_unlock;
436 }
437
438 if (nb_created) {
439 /* Update number of created stream counter. */
440 (*nb_created)++;
441 }
442 /*
443 * Ensure a self-reference is preserved even
444 * after we have put our local reference.
445 */
446 if (!viewer_stream_get(viewer_stream)) {
447 ERR("Unable to get self-reference on viewer stream, logic error.");
448 abort();
449 }
450 } else {
451 if (!viewer_stream->sent_flag && nb_unsent) {
452 /* Update number of unsent stream counter. */
453 (*nb_unsent)++;
454 }
455 }
456 /* Update number of total stream counter. */
457 if (nb_total) {
458 if (relay_stream->is_metadata) {
459 if (!relay_stream->closed ||
460 relay_stream->metadata_received >
461 viewer_stream->metadata_sent) {
462 (*nb_total)++;
463 }
464 } else {
465 if (!relay_stream->closed ||
466 !(((int64_t)(relay_stream->prev_data_seq -
467 relay_stream->last_net_seq_num)) >=
468 0)) {
469 (*nb_total)++;
470 }
471 }
472 }
473 /* Put local reference. */
474 viewer_stream_put(viewer_stream);
475 next:
476 pthread_mutex_unlock(&relay_stream->lock);
477 stream_put(relay_stream);
478 }
479 relay_stream = NULL;
480 ctf_trace_put(ctf_trace);
481 }
482
483 ret = 0;
484
485 error_unlock:
486 rcu_read_unlock();
487
488 if (relay_stream) {
489 pthread_mutex_unlock(&relay_stream->lock);
490 stream_put(relay_stream);
491 }
492
493 return ret;
494 }
495
496 int relayd_live_stop(void)
497 {
498 /* Stop dispatch thread */
499 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
500 futex_nto1_wake(&viewer_conn_queue.futex);
501 return 0;
502 }
503
504 /*
505 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
506 */
507 static
508 int create_named_thread_poll_set(struct lttng_poll_event *events,
509 int size, const char *name)
510 {
511 int ret;
512
513 if (events == NULL || size == 0) {
514 ret = -1;
515 goto error;
516 }
517
518 ret = fd_tracker_util_poll_create(the_fd_tracker,
519 name, events, 1, LTTNG_CLOEXEC);
520 if (ret) {
521 PERROR("Failed to create \"%s\" poll file descriptor", name);
522 goto error;
523 }
524
525 /* Add quit pipe */
526 ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
527 if (ret < 0) {
528 goto error;
529 }
530
531 return 0;
532
533 error:
534 return ret;
535 }
536
537 /*
538 * Check if the thread quit pipe was triggered.
539 *
540 * Return 1 if it was triggered else 0;
541 */
542 static
543 int check_thread_quit_pipe(int fd, uint32_t events)
544 {
545 if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) {
546 return 1;
547 }
548
549 return 0;
550 }
551
552 static
553 int create_sock(void *data, int *out_fd)
554 {
555 int ret;
556 struct lttcomm_sock *sock = (lttcomm_sock *) data;
557
558 ret = lttcomm_create_sock(sock);
559 if (ret < 0) {
560 goto end;
561 }
562
563 *out_fd = sock->fd;
564 end:
565 return ret;
566 }
567
568 static
569 int close_sock(void *data, int *in_fd)
570 {
571 struct lttcomm_sock *sock = (lttcomm_sock *) data;
572
573 return sock->ops->close(sock);
574 }
575
576 static int accept_sock(void *data, int *out_fd)
577 {
578 int ret = 0;
579 /* Socks is an array of in_sock, out_sock. */
580 struct lttcomm_sock **socks = (lttcomm_sock **) data;
581 struct lttcomm_sock *in_sock = socks[0];
582
583 socks[1] = in_sock->ops->accept(in_sock);
584 if (!socks[1]) {
585 ret = -1;
586 goto end;
587 }
588 *out_fd = socks[1]->fd;
589 end:
590 return ret;
591 }
592
593 static
594 struct lttcomm_sock *accept_live_sock(struct lttcomm_sock *listening_sock,
595 const char *name)
596 {
597 int out_fd, ret;
598 struct lttcomm_sock *socks[2] = { listening_sock, NULL };
599 struct lttcomm_sock *new_sock = NULL;
600
601 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &out_fd,
602 (const char **) &name, 1, accept_sock, &socks);
603 if (ret) {
604 goto end;
605 }
606 new_sock = socks[1];
607 DBG("%s accepted, socket %d", name, new_sock->fd);
608 end:
609 return new_sock;
610 }
611
612 /*
613 * Create and init socket from uri.
614 */
615 static
616 struct lttcomm_sock *init_socket(struct lttng_uri *uri, const char *name)
617 {
618 int ret, sock_fd;
619 struct lttcomm_sock *sock = NULL;
620 char uri_str[LTTNG_PATH_MAX];
621 char *formated_name = NULL;
622
623 sock = lttcomm_alloc_sock_from_uri(uri);
624 if (sock == NULL) {
625 ERR("Allocating socket");
626 goto error;
627 }
628
629 /*
630 * Don't fail to create the socket if the name can't be built as it is
631 * only used for debugging purposes.
632 */
633 ret = uri_to_str_url(uri, uri_str, sizeof(uri_str));
634 uri_str[sizeof(uri_str) - 1] = '\0';
635 if (ret >= 0) {
636 ret = asprintf(&formated_name, "%s socket @ %s", name,
637 uri_str);
638 if (ret < 0) {
639 formated_name = NULL;
640 }
641 }
642
643 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &sock_fd,
644 (const char **) (formated_name ? &formated_name : NULL),
645 1, create_sock, sock);
646 if (ret) {
647 PERROR("Failed to create \"%s\" socket",
648 formated_name ?: "Unknown");
649 goto error;
650 }
651 DBG("Listening on %s socket %d", name, sock->fd);
652
653 ret = sock->ops->bind(sock);
654 if (ret < 0) {
655 PERROR("Failed to bind lttng-live socket");
656 goto error;
657 }
658
659 ret = sock->ops->listen(sock, -1);
660 if (ret < 0) {
661 goto error;
662
663 }
664
665 free(formated_name);
666 return sock;
667
668 error:
669 if (sock) {
670 lttcomm_destroy_sock(sock);
671 }
672 free(formated_name);
673 return NULL;
674 }
675
676 /*
677 * This thread manages the listening for new connections on the network
678 */
679 static
680 void *thread_listener(void *data)
681 {
682 int i, ret, pollfd, err = -1;
683 uint32_t revents, nb_fd;
684 struct lttng_poll_event events;
685 struct lttcomm_sock *live_control_sock;
686
687 DBG("[thread] Relay live listener started");
688
689 rcu_register_thread();
690 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
691
692 health_code_update();
693
694 live_control_sock = init_socket(live_uri, "Live listener");
695 if (!live_control_sock) {
696 goto error_sock_control;
697 }
698
699 /* Pass 2 as size here for the thread quit pipe and control sockets. */
700 ret = create_named_thread_poll_set(&events, 2,
701 "Live listener thread epoll");
702 if (ret < 0) {
703 goto error_create_poll;
704 }
705
706 /* Add the control socket */
707 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
708 if (ret < 0) {
709 goto error_poll_add;
710 }
711
712 lttng_relay_notify_ready();
713
714 if (testpoint(relayd_thread_live_listener)) {
715 goto error_testpoint;
716 }
717
718 while (1) {
719 health_code_update();
720
721 DBG("Listener accepting live viewers connections");
722
723 restart:
724 health_poll_entry();
725 ret = lttng_poll_wait(&events, -1);
726 health_poll_exit();
727 if (ret < 0) {
728 /*
729 * Restart interrupted system call.
730 */
731 if (errno == EINTR) {
732 goto restart;
733 }
734 goto error;
735 }
736 nb_fd = ret;
737
738 DBG("Relay new viewer connection received");
739 for (i = 0; i < nb_fd; i++) {
740 health_code_update();
741
742 /* Fetch once the poll data */
743 revents = LTTNG_POLL_GETEV(&events, i);
744 pollfd = LTTNG_POLL_GETFD(&events, i);
745
746 /* Thread quit pipe has been closed. Killing thread. */
747 ret = check_thread_quit_pipe(pollfd, revents);
748 if (ret) {
749 err = 0;
750 goto exit;
751 }
752
753 if (revents & LPOLLIN) {
754 /*
755 * A new connection is requested, therefore a
756 * viewer connection is allocated in this
757 * thread, enqueued to a global queue and
758 * dequeued (and freed) in the worker thread.
759 */
760 int val = 1;
761 struct relay_connection *new_conn;
762 struct lttcomm_sock *newsock;
763
764 newsock = accept_live_sock(live_control_sock,
765 "Live socket to client");
766 if (!newsock) {
767 PERROR("accepting control sock");
768 goto error;
769 }
770 DBG("Relay viewer connection accepted socket %d", newsock->fd);
771
772 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
773 sizeof(val));
774 if (ret < 0) {
775 PERROR("setsockopt inet");
776 lttcomm_destroy_sock(newsock);
777 goto error;
778 }
779 new_conn = connection_create(newsock, RELAY_CONNECTION_UNKNOWN);
780 if (!new_conn) {
781 lttcomm_destroy_sock(newsock);
782 goto error;
783 }
784 /* Ownership assumed by the connection. */
785 newsock = NULL;
786
787 /* Enqueue request for the dispatcher thread. */
788 cds_wfcq_head_ptr_t head;
789 head.h = &viewer_conn_queue.head;
790 cds_wfcq_enqueue(head, &viewer_conn_queue.tail,
791 &new_conn->qnode);
792
793 /*
794 * Wake the dispatch queue futex.
795 * Implicit memory barrier with the
796 * exchange in cds_wfcq_enqueue.
797 */
798 futex_nto1_wake(&viewer_conn_queue.futex);
799 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
800 ERR("socket poll error");
801 goto error;
802 } else {
803 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
804 goto error;
805 }
806 }
807 }
808
809 exit:
810 error:
811 error_poll_add:
812 error_testpoint:
813 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
814 error_create_poll:
815 if (live_control_sock->fd >= 0) {
816 int sock_fd = live_control_sock->fd;
817
818 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker,
819 &sock_fd, 1, close_sock,
820 live_control_sock);
821 if (ret) {
822 PERROR("close");
823 }
824 live_control_sock->fd = -1;
825 }
826 lttcomm_destroy_sock(live_control_sock);
827 error_sock_control:
828 if (err) {
829 health_error();
830 DBG("Live viewer listener thread exited with error");
831 }
832 health_unregister(health_relayd);
833 rcu_unregister_thread();
834 DBG("Live viewer listener thread cleanup complete");
835 if (lttng_relay_stop_threads()) {
836 ERR("Error stopping threads");
837 }
838 return NULL;
839 }
840
841 /*
842 * This thread manages the dispatching of the requests to worker threads
843 */
844 static
845 void *thread_dispatcher(void *data)
846 {
847 int err = -1;
848 ssize_t ret;
849 struct cds_wfcq_node *node;
850 struct relay_connection *conn = NULL;
851
852 DBG("[thread] Live viewer relay dispatcher started");
853
854 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
855
856 if (testpoint(relayd_thread_live_dispatcher)) {
857 goto error_testpoint;
858 }
859
860 health_code_update();
861
862 for (;;) {
863 health_code_update();
864
865 /* Atomically prepare the queue futex */
866 futex_nto1_prepare(&viewer_conn_queue.futex);
867
868 if (CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
869 break;
870 }
871
872 do {
873 health_code_update();
874
875 /* Dequeue commands */
876 node = cds_wfcq_dequeue_blocking(&viewer_conn_queue.head,
877 &viewer_conn_queue.tail);
878 if (node == NULL) {
879 DBG("Woken up but nothing in the live-viewer "
880 "relay command queue");
881 /* Continue thread execution */
882 break;
883 }
884 conn = caa_container_of(node, struct relay_connection, qnode);
885 DBG("Dispatching viewer request waiting on sock %d",
886 conn->sock->fd);
887
888 /*
889 * Inform worker thread of the new request. This
890 * call is blocking so we can be assured that
891 * the data will be read at some point in time
892 * or wait to the end of the world :)
893 */
894 ret = lttng_write(live_conn_pipe[1], &conn, sizeof(conn));
895 if (ret < 0) {
896 PERROR("write conn pipe");
897 connection_put(conn);
898 goto error;
899 }
900 } while (node != NULL);
901
902 /* Futex wait on queue. Blocking call on futex() */
903 health_poll_entry();
904 futex_nto1_wait(&viewer_conn_queue.futex);
905 health_poll_exit();
906 }
907
908 /* Normal exit, no error */
909 err = 0;
910
911 error:
912 error_testpoint:
913 if (err) {
914 health_error();
915 ERR("Health error occurred in %s", __func__);
916 }
917 health_unregister(health_relayd);
918 DBG("Live viewer dispatch thread dying");
919 if (lttng_relay_stop_threads()) {
920 ERR("Error stopping threads");
921 }
922 return NULL;
923 }
924
925 /*
926 * Establish connection with the viewer and check the versions.
927 *
928 * Return 0 on success or else negative value.
929 */
930 static
931 int viewer_connect(struct relay_connection *conn)
932 {
933 int ret;
934 struct lttng_viewer_connect reply, msg;
935
936 conn->version_check_done = 1;
937
938 health_code_update();
939
940 DBG("Viewer is establishing a connection to the relayd.");
941
942 ret = recv_request(conn->sock, &msg, sizeof(msg));
943 if (ret < 0) {
944 goto end;
945 }
946
947 health_code_update();
948
949 memset(&reply, 0, sizeof(reply));
950 reply.major = RELAYD_VERSION_COMM_MAJOR;
951 reply.minor = RELAYD_VERSION_COMM_MINOR;
952
953 /* Major versions must be the same */
954 if (reply.major != be32toh(msg.major)) {
955 DBG("Incompatible major versions ([relayd] %u vs [client] %u)",
956 reply.major, be32toh(msg.major));
957 ret = -1;
958 goto end;
959 }
960
961 conn->major = reply.major;
962 /* We adapt to the lowest compatible version */
963 if (reply.minor <= be32toh(msg.minor)) {
964 conn->minor = reply.minor;
965 } else {
966 conn->minor = be32toh(msg.minor);
967 }
968
969 if (be32toh(msg.type) == LTTNG_VIEWER_CLIENT_COMMAND) {
970 conn->type = RELAY_VIEWER_COMMAND;
971 } else if (be32toh(msg.type) == LTTNG_VIEWER_CLIENT_NOTIFICATION) {
972 conn->type = RELAY_VIEWER_NOTIFICATION;
973 } else {
974 ERR("Unknown connection type : %u", be32toh(msg.type));
975 ret = -1;
976 goto end;
977 }
978
979 reply.major = htobe32(reply.major);
980 reply.minor = htobe32(reply.minor);
981 if (conn->type == RELAY_VIEWER_COMMAND) {
982 /*
983 * Increment outside of htobe64 macro, because the argument can
984 * be used more than once within the macro, and thus the
985 * operation may be undefined.
986 */
987 pthread_mutex_lock(&last_relay_viewer_session_id_lock);
988 last_relay_viewer_session_id++;
989 pthread_mutex_unlock(&last_relay_viewer_session_id_lock);
990 reply.viewer_session_id = htobe64(last_relay_viewer_session_id);
991 }
992
993 health_code_update();
994
995 ret = send_response(conn->sock, &reply, sizeof(reply));
996 if (ret < 0) {
997 goto end;
998 }
999
1000 health_code_update();
1001
1002 DBG("Version check done using protocol %u.%u", conn->major, conn->minor);
1003 ret = 0;
1004
1005 end:
1006 return ret;
1007 }
1008
1009 /*
1010 * Send the viewer the list of current sessions.
1011 * We need to create a copy of the hash table content because otherwise
1012 * we cannot assume the number of entries stays the same between getting
1013 * the number of HT elements and iteration over the HT.
1014 *
1015 * Return 0 on success or else a negative value.
1016 */
1017 static
1018 int viewer_list_sessions(struct relay_connection *conn)
1019 {
1020 int ret = 0;
1021 struct lttng_viewer_list_sessions session_list;
1022 struct lttng_ht_iter iter;
1023 struct relay_session *session;
1024 struct lttng_viewer_session *send_session_buf = NULL;
1025 uint32_t buf_count = SESSION_BUF_DEFAULT_COUNT;
1026 uint32_t count = 0;
1027
1028 DBG("List sessions received");
1029
1030 send_session_buf = (lttng_viewer_session *) zmalloc(SESSION_BUF_DEFAULT_COUNT * sizeof(*send_session_buf));
1031 if (!send_session_buf) {
1032 return -1;
1033 }
1034
1035 rcu_read_lock();
1036 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, session,
1037 session_n.node) {
1038 struct lttng_viewer_session *send_session;
1039
1040 health_code_update();
1041
1042 pthread_mutex_lock(&session->lock);
1043 if (session->connection_closed) {
1044 /* Skip closed session */
1045 goto next_session;
1046 }
1047
1048 if (count >= buf_count) {
1049 struct lttng_viewer_session *newbuf;
1050 uint32_t new_buf_count = buf_count << 1;
1051
1052 newbuf = (lttng_viewer_session *) realloc(send_session_buf,
1053 new_buf_count * sizeof(*send_session_buf));
1054 if (!newbuf) {
1055 ret = -1;
1056 goto break_loop;
1057 }
1058 send_session_buf = newbuf;
1059 buf_count = new_buf_count;
1060 }
1061 send_session = &send_session_buf[count];
1062 if (lttng_strncpy(send_session->session_name,
1063 session->session_name,
1064 sizeof(send_session->session_name))) {
1065 ret = -1;
1066 goto break_loop;
1067 }
1068 if (lttng_strncpy(send_session->hostname, session->hostname,
1069 sizeof(send_session->hostname))) {
1070 ret = -1;
1071 goto break_loop;
1072 }
1073 send_session->id = htobe64(session->id);
1074 send_session->live_timer = htobe32(session->live_timer);
1075 if (session->viewer_attached) {
1076 send_session->clients = htobe32(1);
1077 } else {
1078 send_session->clients = htobe32(0);
1079 }
1080 send_session->streams = htobe32(session->stream_count);
1081 count++;
1082 next_session:
1083 pthread_mutex_unlock(&session->lock);
1084 continue;
1085 break_loop:
1086 pthread_mutex_unlock(&session->lock);
1087 break;
1088 }
1089 rcu_read_unlock();
1090 if (ret < 0) {
1091 goto end_free;
1092 }
1093
1094 session_list.sessions_count = htobe32(count);
1095
1096 health_code_update();
1097
1098 ret = send_response(conn->sock, &session_list, sizeof(session_list));
1099 if (ret < 0) {
1100 goto end_free;
1101 }
1102
1103 health_code_update();
1104
1105 ret = send_response(conn->sock, send_session_buf,
1106 count * sizeof(*send_session_buf));
1107 if (ret < 0) {
1108 goto end_free;
1109 }
1110 health_code_update();
1111
1112 ret = 0;
1113 end_free:
1114 free(send_session_buf);
1115 return ret;
1116 }
1117
1118 /*
1119 * Send the viewer the list of current streams.
1120 */
1121 static
1122 int viewer_get_new_streams(struct relay_connection *conn)
1123 {
1124 int ret, send_streams = 0;
1125 uint32_t nb_created = 0, nb_unsent = 0, nb_streams = 0, nb_total = 0;
1126 struct lttng_viewer_new_streams_request request;
1127 struct lttng_viewer_new_streams_response response;
1128 struct relay_session *session = NULL;
1129 uint64_t session_id;
1130 bool closed = false;
1131
1132 LTTNG_ASSERT(conn);
1133
1134 DBG("Get new streams received");
1135
1136 health_code_update();
1137
1138 /* Receive the request from the connected client. */
1139 ret = recv_request(conn->sock, &request, sizeof(request));
1140 if (ret < 0) {
1141 goto error;
1142 }
1143 session_id = be64toh(request.session_id);
1144
1145 health_code_update();
1146
1147 memset(&response, 0, sizeof(response));
1148
1149 session = session_get_by_id(session_id);
1150 if (!session) {
1151 DBG("Relay session %" PRIu64 " not found", session_id);
1152 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_ERR);
1153 goto send_reply;
1154 }
1155
1156 if (!viewer_session_is_attached(conn->viewer_session, session)) {
1157 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_ERR);
1158 goto send_reply;
1159 }
1160
1161 /*
1162 * For any new stream, create it with LTTNG_VIEWER_SEEK_BEGINNING since
1163 * that at this point the client is already attached to the session.Aany
1164 * initial stream will have been created with the seek type at attach
1165 * time (for now most readers use the LTTNG_VIEWER_SEEK_LAST on attach).
1166 * Otherwise any event happening in a new stream between the attach and
1167 * a call to viewer_get_new_streams will be "lost" (never received) from
1168 * the viewer's point of view.
1169 */
1170 pthread_mutex_lock(&session->lock);
1171 /*
1172 * If a session rotation is ongoing, do not attempt to open any
1173 * stream, because the chunk can be in an intermediate state
1174 * due to directory renaming.
1175 */
1176 if (session->ongoing_rotation) {
1177 DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
1178 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_NO_NEW);
1179 goto send_reply_unlock;
1180 }
1181 ret = make_viewer_streams(session,
1182 conn->viewer_session,
1183 LTTNG_VIEWER_SEEK_BEGINNING, &nb_total, &nb_unsent,
1184 &nb_created, &closed);
1185 if (ret < 0) {
1186 goto error_unlock_session;
1187 }
1188 send_streams = 1;
1189 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_OK);
1190
1191 /* Only send back the newly created streams with the unsent ones. */
1192 nb_streams = nb_created + nb_unsent;
1193 response.streams_count = htobe32(nb_streams);
1194
1195 /*
1196 * If the session is closed, HUP when there are no more streams
1197 * with data.
1198 */
1199 if (closed && nb_total == 0) {
1200 send_streams = 0;
1201 response.streams_count = 0;
1202 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_HUP);
1203 goto send_reply_unlock;
1204 }
1205 send_reply_unlock:
1206 pthread_mutex_unlock(&session->lock);
1207
1208 send_reply:
1209 health_code_update();
1210 ret = send_response(conn->sock, &response, sizeof(response));
1211 if (ret < 0) {
1212 goto end_put_session;
1213 }
1214 health_code_update();
1215
1216 /*
1217 * Unknown or empty session, just return gracefully, the viewer
1218 * knows what is happening.
1219 */
1220 if (!send_streams || !nb_streams) {
1221 ret = 0;
1222 goto end_put_session;
1223 }
1224
1225 /*
1226 * Send stream and *DON'T* ignore the sent flag so every viewer
1227 * streams that were not sent from that point will be sent to
1228 * the viewer.
1229 */
1230 ret = send_viewer_streams(conn->sock, session_id, 0);
1231 if (ret < 0) {
1232 goto end_put_session;
1233 }
1234
1235 end_put_session:
1236 if (session) {
1237 session_put(session);
1238 }
1239 error:
1240 return ret;
1241 error_unlock_session:
1242 pthread_mutex_unlock(&session->lock);
1243 session_put(session);
1244 return ret;
1245 }
1246
1247 /*
1248 * Send the viewer the list of current sessions.
1249 */
1250 static
1251 int viewer_attach_session(struct relay_connection *conn)
1252 {
1253 int send_streams = 0;
1254 ssize_t ret;
1255 uint32_t nb_streams = 0;
1256 enum lttng_viewer_seek seek_type;
1257 struct lttng_viewer_attach_session_request request;
1258 struct lttng_viewer_attach_session_response response;
1259 struct relay_session *session = NULL;
1260 enum lttng_viewer_attach_return_code viewer_attach_status;
1261 bool closed = false;
1262 uint64_t session_id;
1263
1264 LTTNG_ASSERT(conn);
1265
1266 health_code_update();
1267
1268 /* Receive the request from the connected client. */
1269 ret = recv_request(conn->sock, &request, sizeof(request));
1270 if (ret < 0) {
1271 goto error;
1272 }
1273
1274 session_id = be64toh(request.session_id);
1275 health_code_update();
1276
1277 memset(&response, 0, sizeof(response));
1278
1279 if (!conn->viewer_session) {
1280 DBG("Client trying to attach before creating a live viewer session");
1281 response.status = htobe32(LTTNG_VIEWER_ATTACH_NO_SESSION);
1282 goto send_reply;
1283 }
1284
1285 session = session_get_by_id(session_id);
1286 if (!session) {
1287 DBG("Relay session %" PRIu64 " not found", session_id);
1288 response.status = htobe32(LTTNG_VIEWER_ATTACH_UNK);
1289 goto send_reply;
1290 }
1291 DBG("Attach session ID %" PRIu64 " received", session_id);
1292
1293 pthread_mutex_lock(&session->lock);
1294 if (session->live_timer == 0) {
1295 DBG("Not live session");
1296 response.status = htobe32(LTTNG_VIEWER_ATTACH_NOT_LIVE);
1297 goto send_reply;
1298 }
1299
1300 send_streams = 1;
1301 viewer_attach_status = viewer_session_attach(conn->viewer_session,
1302 session);
1303 if (viewer_attach_status != LTTNG_VIEWER_ATTACH_OK) {
1304 response.status = htobe32(viewer_attach_status);
1305 goto send_reply;
1306 }
1307
1308 switch (be32toh(request.seek)) {
1309 case LTTNG_VIEWER_SEEK_BEGINNING:
1310 case LTTNG_VIEWER_SEEK_LAST:
1311 response.status = htobe32(LTTNG_VIEWER_ATTACH_OK);
1312 seek_type = (lttng_viewer_seek) be32toh(request.seek);
1313 break;
1314 default:
1315 ERR("Wrong seek parameter");
1316 response.status = htobe32(LTTNG_VIEWER_ATTACH_SEEK_ERR);
1317 send_streams = 0;
1318 goto send_reply;
1319 }
1320
1321 /*
1322 * If a session rotation is ongoing, do not attempt to open any
1323 * stream, because the chunk can be in an intermediate state
1324 * due to directory renaming.
1325 */
1326 if (session->ongoing_rotation) {
1327 DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
1328 send_streams = 0;
1329 goto send_reply;
1330 }
1331
1332 ret = make_viewer_streams(session,
1333 conn->viewer_session, seek_type,
1334 &nb_streams, NULL, NULL, &closed);
1335 if (ret < 0) {
1336 goto end_put_session;
1337 }
1338 pthread_mutex_unlock(&session->lock);
1339 session_put(session);
1340 session = NULL;
1341
1342 response.streams_count = htobe32(nb_streams);
1343 /*
1344 * If the session is closed when the viewer is attaching, it
1345 * means some of the streams may have been concurrently removed,
1346 * so we don't allow the viewer to attach, even if there are
1347 * streams available.
1348 */
1349 if (closed) {
1350 send_streams = 0;
1351 response.streams_count = 0;
1352 response.status = htobe32(LTTNG_VIEWER_ATTACH_UNK);
1353 goto send_reply;
1354 }
1355
1356 send_reply:
1357 health_code_update();
1358 ret = send_response(conn->sock, &response, sizeof(response));
1359 if (ret < 0) {
1360 goto end_put_session;
1361 }
1362 health_code_update();
1363
1364 /*
1365 * Unknown or empty session, just return gracefully, the viewer
1366 * knows what is happening.
1367 */
1368 if (!send_streams || !nb_streams) {
1369 ret = 0;
1370 goto end_put_session;
1371 }
1372
1373 /* Send stream and ignore the sent flag. */
1374 ret = send_viewer_streams(conn->sock, session_id, 1);
1375 if (ret < 0) {
1376 goto end_put_session;
1377 }
1378
1379 end_put_session:
1380 if (session) {
1381 pthread_mutex_unlock(&session->lock);
1382 session_put(session);
1383 }
1384 error:
1385 return ret;
1386 }
1387
1388 /*
1389 * Open the index file if needed for the given vstream.
1390 *
1391 * If an index file is successfully opened, the vstream will set it as its
1392 * current index file.
1393 *
1394 * Return 0 on success, a negative value on error (-ENOENT if not ready yet).
1395 *
1396 * Called with rstream lock held.
1397 */
1398 static int try_open_index(struct relay_viewer_stream *vstream,
1399 struct relay_stream *rstream)
1400 {
1401 int ret = 0;
1402 const uint32_t connection_major = rstream->trace->session->major;
1403 const uint32_t connection_minor = rstream->trace->session->minor;
1404 enum lttng_trace_chunk_status chunk_status;
1405
1406 if (vstream->index_file) {
1407 goto end;
1408 }
1409
1410 /*
1411 * First time, we open the index file and at least one index is ready.
1412 */
1413 if (rstream->index_received_seqcount == 0 ||
1414 !vstream->stream_file.trace_chunk) {
1415 ret = -ENOENT;
1416 goto end;
1417 }
1418
1419 chunk_status = lttng_index_file_create_from_trace_chunk_read_only(
1420 vstream->stream_file.trace_chunk, rstream->path_name,
1421 rstream->channel_name, rstream->tracefile_size,
1422 vstream->current_tracefile_id,
1423 lttng_to_index_major(connection_major, connection_minor),
1424 lttng_to_index_minor(connection_major, connection_minor),
1425 true, &vstream->index_file);
1426 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
1427 if (chunk_status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE) {
1428 ret = -ENOENT;
1429 } else {
1430 ret = -1;
1431 }
1432 }
1433
1434 end:
1435 return ret;
1436 }
1437
1438 /*
1439 * Check the status of the index for the given stream. This function
1440 * updates the index structure if needed and can put (close) the vstream
1441 * in the HUP situation.
1442 *
1443 * Return 0 means that we can proceed with the index. A value of 1 means
1444 * that the index has been updated and is ready to be sent to the
1445 * client. A negative value indicates an error that can't be handled.
1446 *
1447 * Called with rstream lock held.
1448 */
1449 static int check_index_status(struct relay_viewer_stream *vstream,
1450 struct relay_stream *rstream, struct ctf_trace *trace,
1451 struct lttng_viewer_index *index)
1452 {
1453 int ret;
1454
1455 DBG("Check index status: index_received_seqcount %" PRIu64 " "
1456 "index_sent_seqcount %" PRIu64 " "
1457 "for stream %" PRIu64,
1458 rstream->index_received_seqcount,
1459 vstream->index_sent_seqcount,
1460 vstream->stream->stream_handle);
1461 if ((trace->session->connection_closed || rstream->closed)
1462 && rstream->index_received_seqcount
1463 == vstream->index_sent_seqcount) {
1464 /*
1465 * Last index sent and session connection or relay
1466 * stream are closed.
1467 */
1468 index->status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1469 goto hup;
1470 } else if (rstream->beacon_ts_end != -1ULL &&
1471 (rstream->index_received_seqcount == 0 ||
1472 (vstream->index_sent_seqcount != 0 &&
1473 rstream->index_received_seqcount
1474 <= vstream->index_sent_seqcount))) {
1475 /*
1476 * We've received a synchronization beacon and the last index
1477 * available has been sent, the index for now is inactive.
1478 *
1479 * In this case, we have received a beacon which allows us to
1480 * inform the client of a time interval during which we can
1481 * guarantee that there are no events to read (and never will
1482 * be).
1483 *
1484 * The sent seqcount can grow higher than receive seqcount on
1485 * clear because the rotation performed by clear will push
1486 * the index_sent_seqcount ahead (see
1487 * viewer_stream_sync_tracefile_array_tail) and skip over
1488 * packet sequence numbers.
1489 */
1490 index->status = htobe32(LTTNG_VIEWER_INDEX_INACTIVE);
1491 index->timestamp_end = htobe64(rstream->beacon_ts_end);
1492 index->stream_id = htobe64(rstream->ctf_stream_id);
1493 DBG("Check index status: inactive with beacon, for stream %" PRIu64,
1494 vstream->stream->stream_handle);
1495 goto index_ready;
1496 } else if (rstream->index_received_seqcount == 0 ||
1497 (vstream->index_sent_seqcount != 0 &&
1498 rstream->index_received_seqcount
1499 <= vstream->index_sent_seqcount)) {
1500 /*
1501 * This checks whether received <= sent seqcount. In
1502 * this case, we have not received a beacon. Therefore,
1503 * we can only ask the client to retry later.
1504 *
1505 * The sent seqcount can grow higher than receive seqcount on
1506 * clear because the rotation performed by clear will push
1507 * the index_sent_seqcount ahead (see
1508 * viewer_stream_sync_tracefile_array_tail) and skip over
1509 * packet sequence numbers.
1510 */
1511 index->status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1512 DBG("Check index status: retry for stream %" PRIu64,
1513 vstream->stream->stream_handle);
1514 goto index_ready;
1515 } else if (!tracefile_array_seq_in_file(rstream->tfa,
1516 vstream->current_tracefile_id,
1517 vstream->index_sent_seqcount)) {
1518 /*
1519 * The next index we want to send cannot be read either
1520 * because we need to perform a rotation, or due to
1521 * the producer having overwritten its trace file.
1522 */
1523 DBG("Viewer stream %" PRIu64 " rotation",
1524 vstream->stream->stream_handle);
1525 ret = viewer_stream_rotate(vstream);
1526 if (ret == 1) {
1527 /* EOF across entire stream. */
1528 index->status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1529 goto hup;
1530 }
1531 /*
1532 * If we have been pushed due to overwrite, it
1533 * necessarily means there is data that can be read in
1534 * the stream. If we rotated because we reached the end
1535 * of a tracefile, it means the following tracefile
1536 * needs to contain at least one index, else we would
1537 * have already returned LTTNG_VIEWER_INDEX_RETRY to the
1538 * viewer. The updated index_sent_seqcount needs to
1539 * point to a readable index entry now.
1540 *
1541 * In the case where we "rotate" on a single file, we
1542 * can end up in a case where the requested index is
1543 * still unavailable.
1544 */
1545 if (rstream->tracefile_count == 1 &&
1546 !tracefile_array_seq_in_file(
1547 rstream->tfa,
1548 vstream->current_tracefile_id,
1549 vstream->index_sent_seqcount)) {
1550 index->status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1551 DBG("Check index status: retry: "
1552 "tracefile array sequence number %" PRIu64
1553 " not in file for stream %" PRIu64,
1554 vstream->index_sent_seqcount,
1555 vstream->stream->stream_handle);
1556 goto index_ready;
1557 }
1558 LTTNG_ASSERT(tracefile_array_seq_in_file(rstream->tfa,
1559 vstream->current_tracefile_id,
1560 vstream->index_sent_seqcount));
1561 }
1562 /* ret == 0 means successful so we continue. */
1563 ret = 0;
1564 return ret;
1565
1566 hup:
1567 viewer_stream_put(vstream);
1568 index_ready:
1569 return 1;
1570 }
1571
1572 static
1573 void viewer_stream_rotate_to_trace_chunk(struct relay_viewer_stream *vstream,
1574 struct lttng_trace_chunk *new_trace_chunk)
1575 {
1576 lttng_trace_chunk_put(vstream->stream_file.trace_chunk);
1577
1578 if (new_trace_chunk) {
1579 const bool acquired_reference = lttng_trace_chunk_get(
1580 new_trace_chunk);
1581
1582 LTTNG_ASSERT(acquired_reference);
1583 }
1584
1585 vstream->stream_file.trace_chunk = new_trace_chunk;
1586 viewer_stream_sync_tracefile_array_tail(vstream);
1587 viewer_stream_close_files(vstream);
1588 }
1589
1590 /*
1591 * Send the next index for a stream.
1592 *
1593 * Return 0 on success or else a negative value.
1594 */
1595 static
1596 int viewer_get_next_index(struct relay_connection *conn)
1597 {
1598 int ret;
1599 struct lttng_viewer_get_next_index request_index;
1600 struct lttng_viewer_index viewer_index;
1601 struct ctf_packet_index packet_index;
1602 struct relay_viewer_stream *vstream = NULL;
1603 struct relay_stream *rstream = NULL;
1604 struct ctf_trace *ctf_trace = NULL;
1605 struct relay_viewer_stream *metadata_viewer_stream = NULL;
1606 bool viewer_stream_and_session_in_same_chunk, viewer_stream_one_rotation_behind;
1607 uint64_t stream_file_chunk_id = -1ULL, viewer_session_chunk_id = -1ULL;
1608 enum lttng_trace_chunk_status status;
1609
1610 LTTNG_ASSERT(conn);
1611
1612 DBG("Viewer get next index");
1613
1614 memset(&viewer_index, 0, sizeof(viewer_index));
1615 health_code_update();
1616
1617 ret = recv_request(conn->sock, &request_index, sizeof(request_index));
1618 if (ret < 0) {
1619 goto end;
1620 }
1621 health_code_update();
1622
1623 vstream = viewer_stream_get_by_id(be64toh(request_index.stream_id));
1624 if (!vstream) {
1625 DBG("Client requested index of unknown stream id %" PRIu64,
1626 (uint64_t) be64toh(request_index.stream_id));
1627 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1628 goto send_reply;
1629 }
1630
1631 /* Use back. ref. Protected by refcounts. */
1632 rstream = vstream->stream;
1633 ctf_trace = rstream->trace;
1634
1635 /* metadata_viewer_stream may be NULL. */
1636 metadata_viewer_stream =
1637 ctf_trace_get_viewer_metadata_stream(ctf_trace);
1638
1639 /*
1640 * Hold the session lock to protect against concurrent changes
1641 * to the chunk files (e.g. rename done by clear), which are
1642 * protected by the session ongoing rotation state. Those are
1643 * synchronized with the session lock.
1644 */
1645 pthread_mutex_lock(&rstream->trace->session->lock);
1646 pthread_mutex_lock(&rstream->lock);
1647
1648 /*
1649 * The viewer should not ask for index on metadata stream.
1650 */
1651 if (rstream->is_metadata) {
1652 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1653 goto send_reply;
1654 }
1655
1656 if (rstream->ongoing_rotation.is_set) {
1657 /* Rotation is ongoing, try again later. */
1658 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1659 goto send_reply;
1660 }
1661
1662 if (rstream->trace->session->ongoing_rotation) {
1663 /* Rotation is ongoing, try again later. */
1664 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1665 goto send_reply;
1666 }
1667
1668 /*
1669 * Transition the viewer session into the newest trace chunk available.
1670 */
1671 if (!lttng_trace_chunk_ids_equal(
1672 conn->viewer_session->current_trace_chunk,
1673 rstream->trace_chunk)) {
1674 DBG("Relay stream and viewer chunk ids differ");
1675
1676 ret = viewer_session_set_trace_chunk_copy(
1677 conn->viewer_session,
1678 rstream->trace_chunk);
1679 if (ret) {
1680 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1681 goto send_reply;
1682 }
1683 }
1684
1685 /*
1686 * Transition the viewer stream into the latest trace chunk available.
1687 *
1688 * Note that the stream must _not_ rotate in one precise condition:
1689 * the relay stream has rotated to a NULL trace chunk and the viewer
1690 * stream is consuming the trace chunk that was active just before
1691 * that rotation to NULL.
1692 *
1693 * This allows clients to consume all the packets of a trace chunk
1694 * after a session's destruction.
1695 */
1696 if (vstream->stream_file.trace_chunk) {
1697 status = lttng_trace_chunk_get_id(
1698 vstream->stream_file.trace_chunk,
1699 &stream_file_chunk_id);
1700 LTTNG_ASSERT(status == LTTNG_TRACE_CHUNK_STATUS_OK);
1701 }
1702 if (conn->viewer_session->current_trace_chunk) {
1703 status = lttng_trace_chunk_get_id(
1704 conn->viewer_session->current_trace_chunk,
1705 &viewer_session_chunk_id);
1706 LTTNG_ASSERT(status == LTTNG_TRACE_CHUNK_STATUS_OK);
1707 }
1708
1709 viewer_stream_and_session_in_same_chunk = lttng_trace_chunk_ids_equal(
1710 conn->viewer_session->current_trace_chunk,
1711 vstream->stream_file.trace_chunk);
1712 viewer_stream_one_rotation_behind = rstream->completed_rotation_count ==
1713 vstream->last_seen_rotation_count + 1;
1714
1715 if (viewer_stream_and_session_in_same_chunk) {
1716 DBG("Transition to latest chunk check (%s -> %s): Same chunk, no need to rotate",
1717 vstream->stream_file.trace_chunk ?
1718 std::to_string(stream_file_chunk_id).c_str() :
1719 "None",
1720 conn->viewer_session->current_trace_chunk ?
1721 std::to_string(viewer_session_chunk_id).c_str() :
1722 "None");
1723 } else if (viewer_stream_one_rotation_behind && !rstream->trace_chunk) {
1724 DBG("Transition to latest chunk check (%s -> %s): One chunk behind relay stream which is being destroyed, no need to rotate",
1725 vstream->stream_file.trace_chunk ?
1726 std::to_string(stream_file_chunk_id).c_str() :
1727 "None",
1728 conn->viewer_session->current_trace_chunk ?
1729 std::to_string(viewer_session_chunk_id).c_str() :
1730 "None");
1731 } else {
1732 DBG("Transition to latest chunk check (%s -> %s): Viewer stream chunk ID and viewer session chunk ID differ, rotating viewer stream",
1733 vstream->stream_file.trace_chunk ?
1734 std::to_string(stream_file_chunk_id).c_str() :
1735 "None",
1736 conn->viewer_session->current_trace_chunk ?
1737 std::to_string(viewer_session_chunk_id).c_str() :
1738 "None");
1739
1740 viewer_stream_rotate_to_trace_chunk(vstream,
1741 conn->viewer_session->current_trace_chunk);
1742 vstream->last_seen_rotation_count =
1743 rstream->completed_rotation_count;
1744 }
1745
1746 ret = check_index_status(vstream, rstream, ctf_trace, &viewer_index);
1747 if (ret < 0) {
1748 goto error_put;
1749 } else if (ret == 1) {
1750 /*
1751 * We have no index to send and check_index_status has populated
1752 * viewer_index's status.
1753 */
1754 goto send_reply;
1755 }
1756 /* At this point, ret is 0 thus we will be able to read the index. */
1757 LTTNG_ASSERT(!ret);
1758
1759 /* Try to open an index if one is needed for that stream. */
1760 ret = try_open_index(vstream, rstream);
1761 if (ret == -ENOENT) {
1762 if (rstream->closed) {
1763 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1764 goto send_reply;
1765 } else {
1766 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1767 goto send_reply;
1768 }
1769 }
1770 if (ret < 0) {
1771 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1772 goto send_reply;
1773 }
1774
1775 /*
1776 * vstream->stream_fd may be NULL if it has been closed by
1777 * tracefile rotation, or if we are at the beginning of the
1778 * stream. We open the data stream file here to protect against
1779 * overwrite caused by tracefile rotation (in association with
1780 * unlink performed before overwrite).
1781 */
1782 if (!vstream->stream_file.handle) {
1783 char file_path[LTTNG_PATH_MAX];
1784 struct fs_handle *fs_handle;
1785
1786 ret = utils_stream_file_path(rstream->path_name,
1787 rstream->channel_name, rstream->tracefile_size,
1788 vstream->current_tracefile_id, NULL, file_path,
1789 sizeof(file_path));
1790 if (ret < 0) {
1791 goto error_put;
1792 }
1793
1794 /*
1795 * It is possible the the file we are trying to open is
1796 * missing if the stream has been closed (application exits with
1797 * per-pid buffers) and a clear command has been performed.
1798 */
1799 status = lttng_trace_chunk_open_fs_handle(
1800 vstream->stream_file.trace_chunk,
1801 file_path, O_RDONLY, 0, &fs_handle, true);
1802 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
1803 if (status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE &&
1804 rstream->closed) {
1805 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1806 goto send_reply;
1807 }
1808 PERROR("Failed to open trace file for viewer stream");
1809 goto error_put;
1810 }
1811 vstream->stream_file.handle = fs_handle;
1812 }
1813
1814 ret = check_new_streams(conn);
1815 if (ret < 0) {
1816 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1817 goto send_reply;
1818 } else if (ret == 1) {
1819 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1820 }
1821
1822 ret = lttng_index_file_read(vstream->index_file, &packet_index);
1823 if (ret) {
1824 ERR("Relay error reading index file");
1825 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1826 goto send_reply;
1827 } else {
1828 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_OK);
1829 vstream->index_sent_seqcount++;
1830 }
1831
1832 /*
1833 * Indexes are stored in big endian, no need to switch before sending.
1834 */
1835 DBG("Sending viewer index for stream %" PRIu64 " offset %" PRIu64,
1836 rstream->stream_handle,
1837 (uint64_t) be64toh(packet_index.offset));
1838 viewer_index.offset = packet_index.offset;
1839 viewer_index.packet_size = packet_index.packet_size;
1840 viewer_index.content_size = packet_index.content_size;
1841 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1842 viewer_index.timestamp_end = packet_index.timestamp_end;
1843 viewer_index.events_discarded = packet_index.events_discarded;
1844 viewer_index.stream_id = packet_index.stream_id;
1845
1846 send_reply:
1847 if (rstream) {
1848 pthread_mutex_unlock(&rstream->lock);
1849 pthread_mutex_unlock(&rstream->trace->session->lock);
1850 }
1851
1852 if (metadata_viewer_stream) {
1853 pthread_mutex_lock(&metadata_viewer_stream->stream->lock);
1854 DBG("get next index metadata check: recv %" PRIu64
1855 " sent %" PRIu64,
1856 metadata_viewer_stream->stream->metadata_received,
1857 metadata_viewer_stream->metadata_sent);
1858 if (!metadata_viewer_stream->stream->metadata_received ||
1859 metadata_viewer_stream->stream->metadata_received >
1860 metadata_viewer_stream->metadata_sent) {
1861 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1862 }
1863 pthread_mutex_unlock(&metadata_viewer_stream->stream->lock);
1864 }
1865
1866 viewer_index.flags = htobe32(viewer_index.flags);
1867 health_code_update();
1868
1869 ret = send_response(conn->sock, &viewer_index, sizeof(viewer_index));
1870 if (ret < 0) {
1871 goto end;
1872 }
1873 health_code_update();
1874
1875 if (vstream) {
1876 DBG("Index %" PRIu64 " for stream %" PRIu64 " sent",
1877 vstream->index_sent_seqcount,
1878 vstream->stream->stream_handle);
1879 }
1880 end:
1881 if (metadata_viewer_stream) {
1882 viewer_stream_put(metadata_viewer_stream);
1883 }
1884 if (vstream) {
1885 viewer_stream_put(vstream);
1886 }
1887 return ret;
1888
1889 error_put:
1890 pthread_mutex_unlock(&rstream->lock);
1891 pthread_mutex_unlock(&rstream->trace->session->lock);
1892 if (metadata_viewer_stream) {
1893 viewer_stream_put(metadata_viewer_stream);
1894 }
1895 viewer_stream_put(vstream);
1896 return ret;
1897 }
1898
1899 /*
1900 * Send the next index for a stream
1901 *
1902 * Return 0 on success or else a negative value.
1903 */
1904 static
1905 int viewer_get_packet(struct relay_connection *conn)
1906 {
1907 int ret;
1908 off_t lseek_ret;
1909 char *reply = NULL;
1910 struct lttng_viewer_get_packet get_packet_info;
1911 struct lttng_viewer_trace_packet reply_header;
1912 struct relay_viewer_stream *vstream = NULL;
1913 uint32_t reply_size = sizeof(reply_header);
1914 uint32_t packet_data_len = 0;
1915 ssize_t read_len;
1916 uint64_t stream_id;
1917
1918 DBG2("Relay get data packet");
1919
1920 health_code_update();
1921
1922 ret = recv_request(conn->sock, &get_packet_info,
1923 sizeof(get_packet_info));
1924 if (ret < 0) {
1925 goto end;
1926 }
1927 health_code_update();
1928
1929 /* From this point on, the error label can be reached. */
1930 memset(&reply_header, 0, sizeof(reply_header));
1931 stream_id = (uint64_t) be64toh(get_packet_info.stream_id);
1932
1933 vstream = viewer_stream_get_by_id(stream_id);
1934 if (!vstream) {
1935 DBG("Client requested packet of unknown stream id %" PRIu64,
1936 stream_id);
1937 reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_ERR);
1938 goto send_reply_nolock;
1939 } else {
1940 packet_data_len = be32toh(get_packet_info.len);
1941 reply_size += packet_data_len;
1942 }
1943
1944 reply = (char *) zmalloc(reply_size);
1945 if (!reply) {
1946 PERROR("packet reply zmalloc");
1947 reply_size = sizeof(reply_header);
1948 goto error;
1949 }
1950
1951 pthread_mutex_lock(&vstream->stream->lock);
1952 lseek_ret = fs_handle_seek(vstream->stream_file.handle,
1953 be64toh(get_packet_info.offset), SEEK_SET);
1954 if (lseek_ret < 0) {
1955 PERROR("Failed to seek file system handle of viewer stream %" PRIu64
1956 " to offset %" PRIu64,
1957 stream_id,
1958 (uint64_t) be64toh(get_packet_info.offset));
1959 goto error;
1960 }
1961 read_len = fs_handle_read(vstream->stream_file.handle,
1962 reply + sizeof(reply_header), packet_data_len);
1963 if (read_len < packet_data_len) {
1964 PERROR("Failed to read from file system handle of viewer stream id %" PRIu64
1965 ", offset: %" PRIu64,
1966 stream_id,
1967 (uint64_t) be64toh(get_packet_info.offset));
1968 goto error;
1969 }
1970 reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_OK);
1971 reply_header.len = htobe32(packet_data_len);
1972 goto send_reply;
1973
1974 error:
1975 /* No payload to send on error. */
1976 reply_size = sizeof(reply_header);
1977 reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_ERR);
1978
1979 send_reply:
1980 if (vstream) {
1981 pthread_mutex_unlock(&vstream->stream->lock);
1982 }
1983 send_reply_nolock:
1984
1985 health_code_update();
1986
1987 if (reply) {
1988 memcpy(reply, &reply_header, sizeof(reply_header));
1989 ret = send_response(conn->sock, reply, reply_size);
1990 } else {
1991 /* No reply to send. */
1992 ret = send_response(conn->sock, &reply_header,
1993 reply_size);
1994 }
1995
1996 health_code_update();
1997 if (ret < 0) {
1998 PERROR("sendmsg of packet data failed");
1999 goto end_free;
2000 }
2001
2002 DBG("Sent %u bytes for stream %" PRIu64, reply_size, stream_id);
2003
2004 end_free:
2005 free(reply);
2006 end:
2007 if (vstream) {
2008 viewer_stream_put(vstream);
2009 }
2010 return ret;
2011 }
2012
2013 /*
2014 * Send the session's metadata
2015 *
2016 * Return 0 on success else a negative value.
2017 */
2018 static
2019 int viewer_get_metadata(struct relay_connection *conn)
2020 {
2021 int ret = 0;
2022 int fd = -1;
2023 ssize_t read_len;
2024 uint64_t len = 0;
2025 char *data = NULL;
2026 struct lttng_viewer_get_metadata request;
2027 struct lttng_viewer_metadata_packet reply;
2028 struct relay_viewer_stream *vstream = NULL;
2029
2030 LTTNG_ASSERT(conn);
2031
2032 DBG("Relay get metadata");
2033
2034 health_code_update();
2035
2036 ret = recv_request(conn->sock, &request, sizeof(request));
2037 if (ret < 0) {
2038 goto end;
2039 }
2040 health_code_update();
2041
2042 memset(&reply, 0, sizeof(reply));
2043
2044 vstream = viewer_stream_get_by_id(be64toh(request.stream_id));
2045 if (!vstream) {
2046 /*
2047 * The metadata stream can be closed by a CLOSE command
2048 * just before we attach. It can also be closed by
2049 * per-pid tracing during tracing. Therefore, it is
2050 * possible that we cannot find this viewer stream.
2051 * Reply back to the client with an error if we cannot
2052 * find it.
2053 */
2054 DBG("Client requested metadata of unknown stream id %" PRIu64,
2055 (uint64_t) be64toh(request.stream_id));
2056 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2057 goto send_reply;
2058 }
2059 pthread_mutex_lock(&vstream->stream->lock);
2060 if (!vstream->stream->is_metadata) {
2061 ERR("Invalid metadata stream");
2062 goto error;
2063 }
2064
2065 if (vstream->metadata_sent >= vstream->stream->metadata_received) {
2066 /*
2067 * The live viewers expect to receive a NO_NEW_METADATA
2068 * status before a stream disappears, otherwise they abort the
2069 * entire live connection when receiving an error status.
2070 *
2071 * Clear feature resets the metadata_sent to 0 until the
2072 * same metadata is received again.
2073 */
2074 reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
2075 /*
2076 * The live viewer considers a closed 0 byte metadata stream as
2077 * an error.
2078 */
2079 if (vstream->metadata_sent > 0) {
2080 if (vstream->stream->closed && vstream->stream->no_new_metadata_notified) {
2081 /* Release ownership for the viewer metadata stream. */
2082 viewer_stream_put(vstream);
2083 }
2084 vstream->stream->no_new_metadata_notified = true;
2085 }
2086 goto send_reply;
2087 }
2088
2089 if (vstream->stream->trace_chunk &&
2090 !lttng_trace_chunk_ids_equal(
2091 conn->viewer_session->current_trace_chunk,
2092 vstream->stream->trace_chunk)) {
2093 /* A rotation has occurred on the relay stream. */
2094 DBG("Metadata relay stream and viewer chunk ids differ");
2095
2096 ret = viewer_session_set_trace_chunk_copy(
2097 conn->viewer_session,
2098 vstream->stream->trace_chunk);
2099 if (ret) {
2100 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2101 goto send_reply;
2102 }
2103 }
2104
2105 if (conn->viewer_session->current_trace_chunk &&
2106 !lttng_trace_chunk_ids_equal(conn->viewer_session->current_trace_chunk,
2107 vstream->stream_file.trace_chunk)) {
2108 bool acquired_reference;
2109
2110 DBG("Viewer session and viewer stream chunk differ: "
2111 "vsession chunk %p vstream chunk %p",
2112 conn->viewer_session->current_trace_chunk,
2113 vstream->stream_file.trace_chunk);
2114 lttng_trace_chunk_put(vstream->stream_file.trace_chunk);
2115 acquired_reference = lttng_trace_chunk_get(conn->viewer_session->current_trace_chunk);
2116 LTTNG_ASSERT(acquired_reference);
2117 vstream->stream_file.trace_chunk =
2118 conn->viewer_session->current_trace_chunk;
2119 viewer_stream_close_files(vstream);
2120 }
2121
2122 len = vstream->stream->metadata_received - vstream->metadata_sent;
2123
2124 if (!vstream->stream_file.trace_chunk) {
2125 reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
2126 len = 0;
2127 goto send_reply;
2128 } else if (vstream->stream_file.trace_chunk &&
2129 !vstream->stream_file.handle && len > 0) {
2130 /*
2131 * Either this is the first time the metadata file is read, or a
2132 * rotation of the corresponding relay stream has occurred.
2133 */
2134 struct fs_handle *fs_handle;
2135 char file_path[LTTNG_PATH_MAX];
2136 enum lttng_trace_chunk_status status;
2137 struct relay_stream *rstream = vstream->stream;
2138
2139 ret = utils_stream_file_path(rstream->path_name,
2140 rstream->channel_name, rstream->tracefile_size,
2141 vstream->current_tracefile_id, NULL, file_path,
2142 sizeof(file_path));
2143 if (ret < 0) {
2144 goto error;
2145 }
2146
2147 /*
2148 * It is possible the the metadata file we are trying to open is
2149 * missing if the stream has been closed (application exits with
2150 * per-pid buffers) and a clear command has been performed.
2151 */
2152 status = lttng_trace_chunk_open_fs_handle(
2153 vstream->stream_file.trace_chunk,
2154 file_path, O_RDONLY, 0, &fs_handle, true);
2155 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
2156 if (status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE) {
2157 reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
2158 len = 0;
2159 if (vstream->stream->closed) {
2160 viewer_stream_put(vstream);
2161 }
2162 goto send_reply;
2163 }
2164 PERROR("Failed to open metadata file for viewer stream");
2165 goto error;
2166 }
2167 vstream->stream_file.handle = fs_handle;
2168
2169 if (vstream->metadata_sent != 0) {
2170 /*
2171 * The client does not expect to receive any metadata
2172 * it has received and metadata files in successive
2173 * chunks must be a strict superset of one another.
2174 *
2175 * Skip the first `metadata_sent` bytes to ensure
2176 * they are not sent a second time to the client.
2177 *
2178 * Baring a block layer error or an internal error,
2179 * this seek should not fail as
2180 * `vstream->stream->metadata_received` is reset when
2181 * a relay stream is rotated. If this is reached, it is
2182 * safe to assume that
2183 * `metadata_received` > `metadata_sent`.
2184 */
2185 const off_t seek_ret = fs_handle_seek(fs_handle,
2186 vstream->metadata_sent, SEEK_SET);
2187
2188 if (seek_ret < 0) {
2189 PERROR("Failed to seek metadata viewer stream file to `sent` position: pos = %" PRId64,
2190 vstream->metadata_sent);
2191 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2192 goto send_reply;
2193 }
2194 }
2195 }
2196
2197 reply.len = htobe64(len);
2198 data = (char *) zmalloc(len);
2199 if (!data) {
2200 PERROR("viewer metadata zmalloc");
2201 goto error;
2202 }
2203
2204 fd = fs_handle_get_fd(vstream->stream_file.handle);
2205 if (fd < 0) {
2206 ERR("Failed to restore viewer stream file system handle");
2207 goto error;
2208 }
2209 read_len = lttng_read(fd, data, len);
2210 fs_handle_put_fd(vstream->stream_file.handle);
2211 fd = -1;
2212 if (read_len < len) {
2213 if (read_len < 0) {
2214 PERROR("Failed to read metadata file");
2215 goto error;
2216 } else {
2217 /*
2218 * A clear has been performed which prevents the relay
2219 * from sending `len` bytes of metadata.
2220 *
2221 * It is important not to send any metadata if we
2222 * couldn't read all the available metadata in one shot:
2223 * sending partial metadata can cause the client to
2224 * attempt to parse an incomplete (incoherent) metadata
2225 * stream, which would result in an error.
2226 */
2227 const off_t seek_ret = fs_handle_seek(
2228 vstream->stream_file.handle, -read_len,
2229 SEEK_CUR);
2230
2231 DBG("Failed to read metadata: requested = %" PRIu64 ", got = %zd",
2232 len, read_len);
2233 read_len = 0;
2234 len = 0;
2235 if (seek_ret < 0) {
2236 PERROR("Failed to restore metadata file position after partial read");
2237 ret = -1;
2238 goto error;
2239 }
2240 }
2241 }
2242 vstream->metadata_sent += read_len;
2243 reply.status = htobe32(LTTNG_VIEWER_METADATA_OK);
2244
2245 goto send_reply;
2246
2247 error:
2248 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2249
2250 send_reply:
2251 health_code_update();
2252 if (vstream) {
2253 pthread_mutex_unlock(&vstream->stream->lock);
2254 }
2255 ret = send_response(conn->sock, &reply, sizeof(reply));
2256 if (ret < 0) {
2257 goto end_free;
2258 }
2259 health_code_update();
2260
2261 if (len > 0) {
2262 ret = send_response(conn->sock, data, len);
2263 if (ret < 0) {
2264 goto end_free;
2265 }
2266 }
2267
2268 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
2269 (uint64_t) be64toh(request.stream_id));
2270
2271 DBG("Metadata sent");
2272
2273 end_free:
2274 free(data);
2275 end:
2276 if (vstream) {
2277 viewer_stream_put(vstream);
2278 }
2279 return ret;
2280 }
2281
2282 /*
2283 * Create a viewer session.
2284 *
2285 * Return 0 on success or else a negative value.
2286 */
2287 static
2288 int viewer_create_session(struct relay_connection *conn)
2289 {
2290 int ret;
2291 struct lttng_viewer_create_session_response resp;
2292
2293 DBG("Viewer create session received");
2294
2295 memset(&resp, 0, sizeof(resp));
2296 resp.status = htobe32(LTTNG_VIEWER_CREATE_SESSION_OK);
2297 conn->viewer_session = viewer_session_create();
2298 if (!conn->viewer_session) {
2299 ERR("Allocation viewer session");
2300 resp.status = htobe32(LTTNG_VIEWER_CREATE_SESSION_ERR);
2301 goto send_reply;
2302 }
2303
2304 send_reply:
2305 health_code_update();
2306 ret = send_response(conn->sock, &resp, sizeof(resp));
2307 if (ret < 0) {
2308 goto end;
2309 }
2310 health_code_update();
2311 ret = 0;
2312
2313 end:
2314 return ret;
2315 }
2316
2317 /*
2318 * Detach a viewer session.
2319 *
2320 * Return 0 on success or else a negative value.
2321 */
2322 static
2323 int viewer_detach_session(struct relay_connection *conn)
2324 {
2325 int ret;
2326 struct lttng_viewer_detach_session_response response;
2327 struct lttng_viewer_detach_session_request request;
2328 struct relay_session *session = NULL;
2329 uint64_t viewer_session_to_close;
2330
2331 DBG("Viewer detach session received");
2332
2333 LTTNG_ASSERT(conn);
2334
2335 health_code_update();
2336
2337 /* Receive the request from the connected client. */
2338 ret = recv_request(conn->sock, &request, sizeof(request));
2339 if (ret < 0) {
2340 goto end;
2341 }
2342 viewer_session_to_close = be64toh(request.session_id);
2343
2344 if (!conn->viewer_session) {
2345 DBG("Client trying to detach before creating a live viewer session");
2346 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_ERR);
2347 goto send_reply;
2348 }
2349
2350 health_code_update();
2351
2352 memset(&response, 0, sizeof(response));
2353 DBG("Detaching from session ID %" PRIu64, viewer_session_to_close);
2354
2355 session = session_get_by_id(be64toh(request.session_id));
2356 if (!session) {
2357 DBG("Relay session %" PRIu64 " not found",
2358 (uint64_t) be64toh(request.session_id));
2359 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_UNK);
2360 goto send_reply;
2361 }
2362
2363 ret = viewer_session_is_attached(conn->viewer_session, session);
2364 if (ret != 1) {
2365 DBG("Not attached to this session");
2366 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_ERR);
2367 goto send_reply_put;
2368 }
2369
2370 viewer_session_close_one_session(conn->viewer_session, session);
2371 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_OK);
2372 DBG("Session %" PRIu64 " detached.", viewer_session_to_close);
2373
2374 send_reply_put:
2375 session_put(session);
2376
2377 send_reply:
2378 health_code_update();
2379 ret = send_response(conn->sock, &response, sizeof(response));
2380 if (ret < 0) {
2381 goto end;
2382 }
2383 health_code_update();
2384 ret = 0;
2385
2386 end:
2387 return ret;
2388 }
2389
2390 /*
2391 * live_relay_unknown_command: send -1 if received unknown command
2392 */
2393 static
2394 void live_relay_unknown_command(struct relay_connection *conn)
2395 {
2396 struct lttcomm_relayd_generic_reply reply;
2397
2398 memset(&reply, 0, sizeof(reply));
2399 reply.ret_code = htobe32(LTTNG_ERR_UNK);
2400 (void) send_response(conn->sock, &reply, sizeof(reply));
2401 }
2402
2403 /*
2404 * Process the commands received on the control socket
2405 */
2406 static
2407 int process_control(struct lttng_viewer_cmd *recv_hdr,
2408 struct relay_connection *conn)
2409 {
2410 int ret = 0;
2411 uint32_t msg_value;
2412
2413 msg_value = be32toh(recv_hdr->cmd);
2414
2415 /*
2416 * Make sure we've done the version check before any command other then a
2417 * new client connection.
2418 */
2419 if (msg_value != LTTNG_VIEWER_CONNECT && !conn->version_check_done) {
2420 ERR("Viewer conn value %" PRIu32 " before version check", msg_value);
2421 ret = -1;
2422 goto end;
2423 }
2424
2425 switch (msg_value) {
2426 case LTTNG_VIEWER_CONNECT:
2427 ret = viewer_connect(conn);
2428 break;
2429 case LTTNG_VIEWER_LIST_SESSIONS:
2430 ret = viewer_list_sessions(conn);
2431 break;
2432 case LTTNG_VIEWER_ATTACH_SESSION:
2433 ret = viewer_attach_session(conn);
2434 break;
2435 case LTTNG_VIEWER_GET_NEXT_INDEX:
2436 ret = viewer_get_next_index(conn);
2437 break;
2438 case LTTNG_VIEWER_GET_PACKET:
2439 ret = viewer_get_packet(conn);
2440 break;
2441 case LTTNG_VIEWER_GET_METADATA:
2442 ret = viewer_get_metadata(conn);
2443 break;
2444 case LTTNG_VIEWER_GET_NEW_STREAMS:
2445 ret = viewer_get_new_streams(conn);
2446 break;
2447 case LTTNG_VIEWER_CREATE_SESSION:
2448 ret = viewer_create_session(conn);
2449 break;
2450 case LTTNG_VIEWER_DETACH_SESSION:
2451 ret = viewer_detach_session(conn);
2452 break;
2453 default:
2454 ERR("Received unknown viewer command (%u)",
2455 be32toh(recv_hdr->cmd));
2456 live_relay_unknown_command(conn);
2457 ret = -1;
2458 goto end;
2459 }
2460
2461 end:
2462 return ret;
2463 }
2464
2465 static
2466 void cleanup_connection_pollfd(struct lttng_poll_event *events, int pollfd)
2467 {
2468 int ret;
2469
2470 (void) lttng_poll_del(events, pollfd);
2471
2472 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker, &pollfd, 1,
2473 fd_tracker_util_close_fd, NULL);
2474 if (ret < 0) {
2475 ERR("Closing pollfd %d", pollfd);
2476 }
2477 }
2478
2479 /*
2480 * This thread does the actual work
2481 */
2482 static
2483 void *thread_worker(void *data)
2484 {
2485 int ret, err = -1;
2486 uint32_t nb_fd;
2487 struct lttng_poll_event events;
2488 struct lttng_ht *viewer_connections_ht;
2489 struct lttng_ht_iter iter;
2490 struct lttng_viewer_cmd recv_hdr;
2491 struct relay_connection *destroy_conn;
2492
2493 DBG("[thread] Live viewer relay worker started");
2494
2495 rcu_register_thread();
2496
2497 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
2498
2499 if (testpoint(relayd_thread_live_worker)) {
2500 goto error_testpoint;
2501 }
2502
2503 /* table of connections indexed on socket */
2504 viewer_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
2505 if (!viewer_connections_ht) {
2506 goto viewer_connections_ht_error;
2507 }
2508
2509 ret = create_named_thread_poll_set(&events, 2,
2510 "Live viewer worker thread epoll");
2511 if (ret < 0) {
2512 goto error_poll_create;
2513 }
2514
2515 ret = lttng_poll_add(&events, live_conn_pipe[0], LPOLLIN | LPOLLRDHUP);
2516 if (ret < 0) {
2517 goto error;
2518 }
2519
2520 restart:
2521 while (1) {
2522 int i;
2523
2524 health_code_update();
2525
2526 /* Infinite blocking call, waiting for transmission */
2527 DBG3("Relayd live viewer worker thread polling...");
2528 health_poll_entry();
2529 ret = lttng_poll_wait(&events, -1);
2530 health_poll_exit();
2531 if (ret < 0) {
2532 /*
2533 * Restart interrupted system call.
2534 */
2535 if (errno == EINTR) {
2536 goto restart;
2537 }
2538 goto error;
2539 }
2540
2541 nb_fd = ret;
2542
2543 /*
2544 * Process control. The control connection is prioritised so we don't
2545 * starve it with high throughput tracing data on the data
2546 * connection.
2547 */
2548 for (i = 0; i < nb_fd; i++) {
2549 /* Fetch once the poll data */
2550 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
2551 int pollfd = LTTNG_POLL_GETFD(&events, i);
2552
2553 health_code_update();
2554
2555 /* Thread quit pipe has been closed. Killing thread. */
2556 ret = check_thread_quit_pipe(pollfd, revents);
2557 if (ret) {
2558 err = 0;
2559 goto exit;
2560 }
2561
2562 /* Inspect the relay conn pipe for new connection. */
2563 if (pollfd == live_conn_pipe[0]) {
2564 if (revents & LPOLLIN) {
2565 struct relay_connection *conn;
2566
2567 ret = lttng_read(live_conn_pipe[0],
2568 &conn, sizeof(conn));
2569 if (ret < 0) {
2570 goto error;
2571 }
2572 ret = lttng_poll_add(&events,
2573 conn->sock->fd,
2574 LPOLLIN | LPOLLRDHUP);
2575 if (ret) {
2576 ERR("Failed to add new live connection file descriptor to poll set");
2577 goto error;
2578 }
2579 connection_ht_add(viewer_connections_ht, conn);
2580 DBG("Connection socket %d added to poll", conn->sock->fd);
2581 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2582 ERR("Relay live pipe error");
2583 goto error;
2584 } else {
2585 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2586 goto error;
2587 }
2588 } else {
2589 /* Connection activity. */
2590 struct relay_connection *conn;
2591
2592 conn = connection_get_by_sock(viewer_connections_ht, pollfd);
2593 if (!conn) {
2594 continue;
2595 }
2596
2597 if (revents & LPOLLIN) {
2598 ret = conn->sock->ops->recvmsg(conn->sock, &recv_hdr,
2599 sizeof(recv_hdr), 0);
2600 if (ret <= 0) {
2601 /* Connection closed. */
2602 cleanup_connection_pollfd(&events, pollfd);
2603 /* Put "create" ownership reference. */
2604 connection_put(conn);
2605 DBG("Viewer control conn closed with %d", pollfd);
2606 } else {
2607 ret = process_control(&recv_hdr, conn);
2608 if (ret < 0) {
2609 /* Clear the session on error. */
2610 cleanup_connection_pollfd(&events, pollfd);
2611 /* Put "create" ownership reference. */
2612 connection_put(conn);
2613 DBG("Viewer connection closed with %d", pollfd);
2614 }
2615 }
2616 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2617 cleanup_connection_pollfd(&events, pollfd);
2618 /* Put "create" ownership reference. */
2619 connection_put(conn);
2620 } else {
2621 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2622 connection_put(conn);
2623 goto error;
2624 }
2625 /* Put local "get_by_sock" reference. */
2626 connection_put(conn);
2627 }
2628 }
2629 }
2630
2631 exit:
2632 error:
2633 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
2634
2635 /* Cleanup remaining connection object. */
2636 rcu_read_lock();
2637 cds_lfht_for_each_entry(viewer_connections_ht->ht, &iter.iter,
2638 destroy_conn,
2639 sock_n.node) {
2640 health_code_update();
2641 connection_put(destroy_conn);
2642 }
2643 rcu_read_unlock();
2644 error_poll_create:
2645 lttng_ht_destroy(viewer_connections_ht);
2646 viewer_connections_ht_error:
2647 /* Close relay conn pipes */
2648 (void) fd_tracker_util_pipe_close(the_fd_tracker, live_conn_pipe);
2649 if (err) {
2650 DBG("Viewer worker thread exited with error");
2651 }
2652 DBG("Viewer worker thread cleanup complete");
2653 error_testpoint:
2654 if (err) {
2655 health_error();
2656 ERR("Health error occurred in %s", __func__);
2657 }
2658 health_unregister(health_relayd);
2659 if (lttng_relay_stop_threads()) {
2660 ERR("Error stopping threads");
2661 }
2662 rcu_unregister_thread();
2663 return NULL;
2664 }
2665
2666 /*
2667 * Create the relay command pipe to wake thread_manage_apps.
2668 * Closed in cleanup().
2669 */
2670 static int create_conn_pipe(void)
2671 {
2672 return fd_tracker_util_pipe_open_cloexec(the_fd_tracker,
2673 "Live connection pipe", live_conn_pipe);
2674 }
2675
2676 int relayd_live_join(void)
2677 {
2678 int ret, retval = 0;
2679 void *status;
2680
2681 ret = pthread_join(live_listener_thread, &status);
2682 if (ret) {
2683 errno = ret;
2684 PERROR("pthread_join live listener");
2685 retval = -1;
2686 }
2687
2688 ret = pthread_join(live_worker_thread, &status);
2689 if (ret) {
2690 errno = ret;
2691 PERROR("pthread_join live worker");
2692 retval = -1;
2693 }
2694
2695 ret = pthread_join(live_dispatcher_thread, &status);
2696 if (ret) {
2697 errno = ret;
2698 PERROR("pthread_join live dispatcher");
2699 retval = -1;
2700 }
2701
2702 cleanup_relayd_live();
2703
2704 return retval;
2705 }
2706
2707 /*
2708 * main
2709 */
2710 int relayd_live_create(struct lttng_uri *uri)
2711 {
2712 int ret = 0, retval = 0;
2713 void *status;
2714 int is_root;
2715
2716 if (!uri) {
2717 retval = -1;
2718 goto exit_init_data;
2719 }
2720 live_uri = uri;
2721
2722 /* Check if daemon is UID = 0 */
2723 is_root = !getuid();
2724
2725 if (!is_root) {
2726 if (live_uri->port < 1024) {
2727 ERR("Need to be root to use ports < 1024");
2728 retval = -1;
2729 goto exit_init_data;
2730 }
2731 }
2732
2733 /* Setup the thread apps communication pipe. */
2734 if (create_conn_pipe()) {
2735 retval = -1;
2736 goto exit_init_data;
2737 }
2738
2739 /* Init relay command queue. */
2740 cds_wfcq_init(&viewer_conn_queue.head, &viewer_conn_queue.tail);
2741
2742 /* Set up max poll set size */
2743 if (lttng_poll_set_max_size()) {
2744 retval = -1;
2745 goto exit_init_data;
2746 }
2747
2748 /* Setup the dispatcher thread */
2749 ret = pthread_create(&live_dispatcher_thread, default_pthread_attr(),
2750 thread_dispatcher, (void *) NULL);
2751 if (ret) {
2752 errno = ret;
2753 PERROR("pthread_create viewer dispatcher");
2754 retval = -1;
2755 goto exit_dispatcher_thread;
2756 }
2757
2758 /* Setup the worker thread */
2759 ret = pthread_create(&live_worker_thread, default_pthread_attr(),
2760 thread_worker, NULL);
2761 if (ret) {
2762 errno = ret;
2763 PERROR("pthread_create viewer worker");
2764 retval = -1;
2765 goto exit_worker_thread;
2766 }
2767
2768 /* Setup the listener thread */
2769 ret = pthread_create(&live_listener_thread, default_pthread_attr(),
2770 thread_listener, (void *) NULL);
2771 if (ret) {
2772 errno = ret;
2773 PERROR("pthread_create viewer listener");
2774 retval = -1;
2775 goto exit_listener_thread;
2776 }
2777
2778 /*
2779 * All OK, started all threads.
2780 */
2781 return retval;
2782
2783 /*
2784 * Join on the live_listener_thread should anything be added after
2785 * the live_listener thread's creation.
2786 */
2787
2788 exit_listener_thread:
2789
2790 ret = pthread_join(live_worker_thread, &status);
2791 if (ret) {
2792 errno = ret;
2793 PERROR("pthread_join live worker");
2794 retval = -1;
2795 }
2796 exit_worker_thread:
2797
2798 ret = pthread_join(live_dispatcher_thread, &status);
2799 if (ret) {
2800 errno = ret;
2801 PERROR("pthread_join live dispatcher");
2802 retval = -1;
2803 }
2804 exit_dispatcher_thread:
2805
2806 exit_init_data:
2807 cleanup_relayd_live();
2808
2809 return retval;
2810 }
This page took 0.145198 seconds and 4 git commands to generate.