relayd: clarify viewer_get_next_index no rotation condition
[lttng-tools.git] / src / bin / lttng-relayd / live.cpp
1 /*
2 * Copyright (C) 2013 Julien Desfossez <jdesfossez@efficios.com>
3 * Copyright (C) 2013 David Goulet <dgoulet@efficios.com>
4 * Copyright (C) 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * SPDX-License-Identifier: GPL-2.0-only
7 *
8 */
9
10 #define _LGPL_SOURCE
11 #include <fcntl.h>
12 #include <getopt.h>
13 #include <grp.h>
14 #include <inttypes.h>
15 #include <limits.h>
16 #include <pthread.h>
17 #include <signal.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/mman.h>
22 #include <sys/mount.h>
23 #include <sys/resource.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <unistd.h>
29 #include <urcu/futex.h>
30 #include <urcu/rculist.h>
31 #include <urcu/uatomic.h>
32 #include <string>
33
34 #include <common/common.h>
35 #include <common/compat/endian.h>
36 #include <common/compat/poll.h>
37 #include <common/compat/socket.h>
38 #include <common/defaults.h>
39 #include <common/fd-tracker/utils.h>
40 #include <common/fs-handle.h>
41 #include <common/futex.h>
42 #include <common/index/index.h>
43 #include <common/sessiond-comm/inet.h>
44 #include <common/sessiond-comm/relayd.h>
45 #include <common/sessiond-comm/sessiond-comm.h>
46 #include <common/uri.h>
47 #include <common/utils.h>
48 #include <lttng/lttng.h>
49
50 #include "cmd.h"
51 #include "connection.h"
52 #include "ctf-trace.h"
53 #include "health-relayd.h"
54 #include "live.h"
55 #include "lttng-relayd.h"
56 #include "session.h"
57 #include "stream.h"
58 #include "testpoint.h"
59 #include "utils.h"
60 #include "viewer-session.h"
61 #include "viewer-stream.h"
62
63 #define SESSION_BUF_DEFAULT_COUNT 16
64
65 static struct lttng_uri *live_uri;
66
67 /*
68 * This pipe is used to inform the worker thread that a command is queued and
69 * ready to be processed.
70 */
71 static int live_conn_pipe[2] = { -1, -1 };
72
73 /* Shared between threads */
74 static int live_dispatch_thread_exit;
75
76 static pthread_t live_listener_thread;
77 static pthread_t live_dispatcher_thread;
78 static pthread_t live_worker_thread;
79
80 /*
81 * Relay command queue.
82 *
83 * The live_thread_listener and live_thread_dispatcher communicate with this
84 * queue.
85 */
86 static struct relay_conn_queue viewer_conn_queue;
87
88 static uint64_t last_relay_viewer_session_id;
89 static pthread_mutex_t last_relay_viewer_session_id_lock =
90 PTHREAD_MUTEX_INITIALIZER;
91
92 /*
93 * Cleanup the daemon
94 */
95 static
96 void cleanup_relayd_live(void)
97 {
98 DBG("Cleaning up");
99
100 free(live_uri);
101 }
102
103 /*
104 * Receive a request buffer using a given socket, destination allocated buffer
105 * of length size.
106 *
107 * Return the size of the received message or else a negative value on error
108 * with errno being set by recvmsg() syscall.
109 */
110 static
111 ssize_t recv_request(struct lttcomm_sock *sock, void *buf, size_t size)
112 {
113 ssize_t ret;
114
115 ret = sock->ops->recvmsg(sock, buf, size, 0);
116 if (ret < 0 || ret != size) {
117 if (ret == 0) {
118 /* Orderly shutdown. Not necessary to print an error. */
119 DBG("Socket %d did an orderly shutdown", sock->fd);
120 } else {
121 ERR("Relay failed to receive request.");
122 }
123 ret = -1;
124 }
125
126 return ret;
127 }
128
129 /*
130 * Send a response buffer using a given socket, source allocated buffer of
131 * length size.
132 *
133 * Return the size of the sent message or else a negative value on error with
134 * errno being set by sendmsg() syscall.
135 */
136 static
137 ssize_t send_response(struct lttcomm_sock *sock, void *buf, size_t size)
138 {
139 ssize_t ret;
140
141 ret = sock->ops->sendmsg(sock, buf, size, 0);
142 if (ret < 0) {
143 ERR("Relayd failed to send response.");
144 }
145
146 return ret;
147 }
148
149 /*
150 * Atomically check if new streams got added in one of the sessions attached
151 * and reset the flag to 0.
152 *
153 * Returns 1 if new streams got added, 0 if nothing changed, a negative value
154 * on error.
155 */
156 static
157 int check_new_streams(struct relay_connection *conn)
158 {
159 struct relay_session *session;
160 unsigned long current_val;
161 int ret = 0;
162
163 if (!conn->viewer_session) {
164 goto end;
165 }
166 rcu_read_lock();
167 cds_list_for_each_entry_rcu(session,
168 &conn->viewer_session->session_list,
169 viewer_session_node) {
170 if (!session_get(session)) {
171 continue;
172 }
173 current_val = uatomic_cmpxchg(&session->new_streams, 1, 0);
174 ret = current_val;
175 session_put(session);
176 if (ret == 1) {
177 goto end;
178 }
179 }
180 end:
181 rcu_read_unlock();
182 return ret;
183 }
184
185 /*
186 * Send viewer streams to the given socket. The ignore_sent_flag indicates if
187 * this function should ignore the sent flag or not.
188 *
189 * Return 0 on success or else a negative value.
190 */
191 static
192 ssize_t send_viewer_streams(struct lttcomm_sock *sock,
193 uint64_t session_id, unsigned int ignore_sent_flag)
194 {
195 ssize_t ret;
196 struct lttng_ht_iter iter;
197 struct relay_viewer_stream *vstream;
198
199 rcu_read_lock();
200
201 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, vstream,
202 stream_n.node) {
203 struct ctf_trace *ctf_trace;
204 struct lttng_viewer_stream send_stream = {};
205
206 health_code_update();
207
208 if (!viewer_stream_get(vstream)) {
209 continue;
210 }
211
212 pthread_mutex_lock(&vstream->stream->lock);
213 /* Ignore if not the same session. */
214 if (vstream->stream->trace->session->id != session_id ||
215 (!ignore_sent_flag && vstream->sent_flag)) {
216 pthread_mutex_unlock(&vstream->stream->lock);
217 viewer_stream_put(vstream);
218 continue;
219 }
220
221 ctf_trace = vstream->stream->trace;
222 send_stream.id = htobe64(vstream->stream->stream_handle);
223 send_stream.ctf_trace_id = htobe64(ctf_trace->id);
224 send_stream.metadata_flag = htobe32(
225 vstream->stream->is_metadata);
226 if (lttng_strncpy(send_stream.path_name, vstream->path_name,
227 sizeof(send_stream.path_name))) {
228 pthread_mutex_unlock(&vstream->stream->lock);
229 viewer_stream_put(vstream);
230 ret = -1; /* Error. */
231 goto end_unlock;
232 }
233 if (lttng_strncpy(send_stream.channel_name,
234 vstream->channel_name,
235 sizeof(send_stream.channel_name))) {
236 pthread_mutex_unlock(&vstream->stream->lock);
237 viewer_stream_put(vstream);
238 ret = -1; /* Error. */
239 goto end_unlock;
240 }
241
242 DBG("Sending stream %" PRIu64 " to viewer",
243 vstream->stream->stream_handle);
244 vstream->sent_flag = 1;
245 pthread_mutex_unlock(&vstream->stream->lock);
246
247 ret = send_response(sock, &send_stream, sizeof(send_stream));
248 viewer_stream_put(vstream);
249 if (ret < 0) {
250 goto end_unlock;
251 }
252 }
253
254 ret = 0;
255
256 end_unlock:
257 rcu_read_unlock();
258 return ret;
259 }
260
261 /*
262 * Create every viewer stream possible for the given session with the seek
263 * type. Three counters *can* be return which are in order the total amount of
264 * viewer stream of the session, the number of unsent stream and the number of
265 * stream created. Those counters can be NULL and thus will be ignored.
266 *
267 * session must be locked to ensure that we see either none or all initial
268 * streams for a session, but no intermediate state..
269 *
270 * Return 0 on success or else a negative value.
271 */
272 static int make_viewer_streams(struct relay_session *relay_session,
273 struct relay_viewer_session *viewer_session,
274 enum lttng_viewer_seek seek_t,
275 uint32_t *nb_total,
276 uint32_t *nb_unsent,
277 uint32_t *nb_created,
278 bool *closed)
279 {
280 int ret;
281 struct lttng_ht_iter iter;
282 struct ctf_trace *ctf_trace;
283 struct relay_stream *relay_stream = NULL;
284
285 LTTNG_ASSERT(relay_session);
286 ASSERT_LOCKED(relay_session->lock);
287
288 if (relay_session->connection_closed) {
289 *closed = true;
290 }
291
292 /*
293 * Create viewer streams for relay streams that are ready to be
294 * used for a the given session id only.
295 */
296 rcu_read_lock();
297 cds_lfht_for_each_entry (relay_session->ctf_traces_ht->ht, &iter.iter,
298 ctf_trace, node.node) {
299 bool trace_has_metadata_stream = false;
300
301 health_code_update();
302
303 if (!ctf_trace_get(ctf_trace)) {
304 continue;
305 }
306
307 /*
308 * Iterate over all the streams of the trace to see if we have a
309 * metadata stream.
310 */
311 cds_list_for_each_entry_rcu(relay_stream,
312 &ctf_trace->stream_list, stream_node)
313 {
314 bool is_metadata_stream;
315
316 pthread_mutex_lock(&relay_stream->lock);
317 is_metadata_stream = relay_stream->is_metadata;
318 pthread_mutex_unlock(&relay_stream->lock);
319
320 if (is_metadata_stream) {
321 trace_has_metadata_stream = true;
322 break;
323 }
324 }
325
326 relay_stream = NULL;
327
328 /*
329 * If there is no metadata stream in this trace at the moment
330 * and we never sent one to the viewer, skip the trace. We
331 * accept that the viewer will not see this trace at all.
332 */
333 if (!trace_has_metadata_stream &&
334 !ctf_trace->metadata_stream_sent_to_viewer) {
335 ctf_trace_put(ctf_trace);
336 continue;
337 }
338
339 cds_list_for_each_entry_rcu(relay_stream,
340 &ctf_trace->stream_list, stream_node)
341 {
342 struct relay_viewer_stream *viewer_stream;
343
344 if (!stream_get(relay_stream)) {
345 continue;
346 }
347
348 pthread_mutex_lock(&relay_stream->lock);
349 /*
350 * stream published is protected by the session lock.
351 */
352 if (!relay_stream->published) {
353 goto next;
354 }
355 viewer_stream = viewer_stream_get_by_id(
356 relay_stream->stream_handle);
357 if (!viewer_stream) {
358 struct lttng_trace_chunk *viewer_stream_trace_chunk = NULL;
359
360 /*
361 * Save that we sent the metadata stream to the
362 * viewer. So that we know what trace the viewer
363 * is aware of.
364 */
365 if (relay_stream->is_metadata) {
366 ctf_trace->metadata_stream_sent_to_viewer = true;
367 }
368
369 /*
370 * If a rotation is ongoing, use a copy of the
371 * relay stream's chunk to ensure the stream
372 * files exist.
373 *
374 * Otherwise, the viewer session's current trace
375 * chunk can be used safely.
376 */
377 if ((relay_stream->ongoing_rotation.is_set ||
378 relay_session->ongoing_rotation) &&
379 relay_stream->trace_chunk) {
380 viewer_stream_trace_chunk = lttng_trace_chunk_copy(
381 relay_stream->trace_chunk);
382 if (!viewer_stream_trace_chunk) {
383 ret = -1;
384 ctf_trace_put(ctf_trace);
385 goto error_unlock;
386 }
387 } else {
388 /*
389 * Transition the viewer session into the newest trace chunk available.
390 */
391 if (!lttng_trace_chunk_ids_equal(viewer_session->current_trace_chunk,
392 relay_stream->trace_chunk)) {
393
394 ret = viewer_session_set_trace_chunk_copy(
395 viewer_session,
396 relay_stream->trace_chunk);
397 if (ret) {
398 ret = -1;
399 ctf_trace_put(ctf_trace);
400 goto error_unlock;
401 }
402 }
403
404 if (relay_stream->trace_chunk) {
405 /*
406 * If the corresponding relay
407 * stream's trace chunk is set,
408 * the viewer stream will be
409 * created under it.
410 *
411 * Note that a relay stream can
412 * have a NULL output trace
413 * chunk (for instance, after a
414 * clear against a stopped
415 * session).
416 */
417 const bool reference_acquired = lttng_trace_chunk_get(
418 viewer_session->current_trace_chunk);
419
420 LTTNG_ASSERT(reference_acquired);
421 viewer_stream_trace_chunk =
422 viewer_session->current_trace_chunk;
423 }
424 }
425
426 viewer_stream = viewer_stream_create(
427 relay_stream,
428 viewer_stream_trace_chunk,
429 seek_t);
430 lttng_trace_chunk_put(viewer_stream_trace_chunk);
431 viewer_stream_trace_chunk = NULL;
432 if (!viewer_stream) {
433 ret = -1;
434 ctf_trace_put(ctf_trace);
435 goto error_unlock;
436 }
437
438 if (nb_created) {
439 /* Update number of created stream counter. */
440 (*nb_created)++;
441 }
442 /*
443 * Ensure a self-reference is preserved even
444 * after we have put our local reference.
445 */
446 if (!viewer_stream_get(viewer_stream)) {
447 ERR("Unable to get self-reference on viewer stream, logic error.");
448 abort();
449 }
450 } else {
451 if (!viewer_stream->sent_flag && nb_unsent) {
452 /* Update number of unsent stream counter. */
453 (*nb_unsent)++;
454 }
455 }
456 /* Update number of total stream counter. */
457 if (nb_total) {
458 if (relay_stream->is_metadata) {
459 if (!relay_stream->closed ||
460 relay_stream->metadata_received >
461 viewer_stream->metadata_sent) {
462 (*nb_total)++;
463 }
464 } else {
465 if (!relay_stream->closed ||
466 !(((int64_t)(relay_stream->prev_data_seq -
467 relay_stream->last_net_seq_num)) >=
468 0)) {
469 (*nb_total)++;
470 }
471 }
472 }
473 /* Put local reference. */
474 viewer_stream_put(viewer_stream);
475 next:
476 pthread_mutex_unlock(&relay_stream->lock);
477 stream_put(relay_stream);
478 }
479 relay_stream = NULL;
480 ctf_trace_put(ctf_trace);
481 }
482
483 ret = 0;
484
485 error_unlock:
486 rcu_read_unlock();
487
488 if (relay_stream) {
489 pthread_mutex_unlock(&relay_stream->lock);
490 stream_put(relay_stream);
491 }
492
493 return ret;
494 }
495
496 int relayd_live_stop(void)
497 {
498 /* Stop dispatch thread */
499 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
500 futex_nto1_wake(&viewer_conn_queue.futex);
501 return 0;
502 }
503
504 /*
505 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
506 */
507 static
508 int create_named_thread_poll_set(struct lttng_poll_event *events,
509 int size, const char *name)
510 {
511 int ret;
512
513 if (events == NULL || size == 0) {
514 ret = -1;
515 goto error;
516 }
517
518 ret = fd_tracker_util_poll_create(the_fd_tracker,
519 name, events, 1, LTTNG_CLOEXEC);
520 if (ret) {
521 PERROR("Failed to create \"%s\" poll file descriptor", name);
522 goto error;
523 }
524
525 /* Add quit pipe */
526 ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
527 if (ret < 0) {
528 goto error;
529 }
530
531 return 0;
532
533 error:
534 return ret;
535 }
536
537 /*
538 * Check if the thread quit pipe was triggered.
539 *
540 * Return 1 if it was triggered else 0;
541 */
542 static
543 int check_thread_quit_pipe(int fd, uint32_t events)
544 {
545 if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) {
546 return 1;
547 }
548
549 return 0;
550 }
551
552 static
553 int create_sock(void *data, int *out_fd)
554 {
555 int ret;
556 struct lttcomm_sock *sock = (lttcomm_sock *) data;
557
558 ret = lttcomm_create_sock(sock);
559 if (ret < 0) {
560 goto end;
561 }
562
563 *out_fd = sock->fd;
564 end:
565 return ret;
566 }
567
568 static
569 int close_sock(void *data, int *in_fd)
570 {
571 struct lttcomm_sock *sock = (lttcomm_sock *) data;
572
573 return sock->ops->close(sock);
574 }
575
576 static int accept_sock(void *data, int *out_fd)
577 {
578 int ret = 0;
579 /* Socks is an array of in_sock, out_sock. */
580 struct lttcomm_sock **socks = (lttcomm_sock **) data;
581 struct lttcomm_sock *in_sock = socks[0];
582
583 socks[1] = in_sock->ops->accept(in_sock);
584 if (!socks[1]) {
585 ret = -1;
586 goto end;
587 }
588 *out_fd = socks[1]->fd;
589 end:
590 return ret;
591 }
592
593 static
594 struct lttcomm_sock *accept_live_sock(struct lttcomm_sock *listening_sock,
595 const char *name)
596 {
597 int out_fd, ret;
598 struct lttcomm_sock *socks[2] = { listening_sock, NULL };
599 struct lttcomm_sock *new_sock = NULL;
600
601 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &out_fd,
602 (const char **) &name, 1, accept_sock, &socks);
603 if (ret) {
604 goto end;
605 }
606 new_sock = socks[1];
607 DBG("%s accepted, socket %d", name, new_sock->fd);
608 end:
609 return new_sock;
610 }
611
612 /*
613 * Create and init socket from uri.
614 */
615 static
616 struct lttcomm_sock *init_socket(struct lttng_uri *uri, const char *name)
617 {
618 int ret, sock_fd;
619 struct lttcomm_sock *sock = NULL;
620 char uri_str[LTTNG_PATH_MAX];
621 char *formated_name = NULL;
622
623 sock = lttcomm_alloc_sock_from_uri(uri);
624 if (sock == NULL) {
625 ERR("Allocating socket");
626 goto error;
627 }
628
629 /*
630 * Don't fail to create the socket if the name can't be built as it is
631 * only used for debugging purposes.
632 */
633 ret = uri_to_str_url(uri, uri_str, sizeof(uri_str));
634 uri_str[sizeof(uri_str) - 1] = '\0';
635 if (ret >= 0) {
636 ret = asprintf(&formated_name, "%s socket @ %s", name,
637 uri_str);
638 if (ret < 0) {
639 formated_name = NULL;
640 }
641 }
642
643 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &sock_fd,
644 (const char **) (formated_name ? &formated_name : NULL),
645 1, create_sock, sock);
646 if (ret) {
647 PERROR("Failed to create \"%s\" socket",
648 formated_name ?: "Unknown");
649 goto error;
650 }
651 DBG("Listening on %s socket %d", name, sock->fd);
652
653 ret = sock->ops->bind(sock);
654 if (ret < 0) {
655 PERROR("Failed to bind lttng-live socket");
656 goto error;
657 }
658
659 ret = sock->ops->listen(sock, -1);
660 if (ret < 0) {
661 goto error;
662
663 }
664
665 free(formated_name);
666 return sock;
667
668 error:
669 if (sock) {
670 lttcomm_destroy_sock(sock);
671 }
672 free(formated_name);
673 return NULL;
674 }
675
676 /*
677 * This thread manages the listening for new connections on the network
678 */
679 static
680 void *thread_listener(void *data)
681 {
682 int i, ret, pollfd, err = -1;
683 uint32_t revents, nb_fd;
684 struct lttng_poll_event events;
685 struct lttcomm_sock *live_control_sock;
686
687 DBG("[thread] Relay live listener started");
688
689 rcu_register_thread();
690 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
691
692 health_code_update();
693
694 live_control_sock = init_socket(live_uri, "Live listener");
695 if (!live_control_sock) {
696 goto error_sock_control;
697 }
698
699 /* Pass 2 as size here for the thread quit pipe and control sockets. */
700 ret = create_named_thread_poll_set(&events, 2,
701 "Live listener thread epoll");
702 if (ret < 0) {
703 goto error_create_poll;
704 }
705
706 /* Add the control socket */
707 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
708 if (ret < 0) {
709 goto error_poll_add;
710 }
711
712 lttng_relay_notify_ready();
713
714 if (testpoint(relayd_thread_live_listener)) {
715 goto error_testpoint;
716 }
717
718 while (1) {
719 health_code_update();
720
721 DBG("Listener accepting live viewers connections");
722
723 restart:
724 health_poll_entry();
725 ret = lttng_poll_wait(&events, -1);
726 health_poll_exit();
727 if (ret < 0) {
728 /*
729 * Restart interrupted system call.
730 */
731 if (errno == EINTR) {
732 goto restart;
733 }
734 goto error;
735 }
736 nb_fd = ret;
737
738 DBG("Relay new viewer connection received");
739 for (i = 0; i < nb_fd; i++) {
740 health_code_update();
741
742 /* Fetch once the poll data */
743 revents = LTTNG_POLL_GETEV(&events, i);
744 pollfd = LTTNG_POLL_GETFD(&events, i);
745
746 /* Thread quit pipe has been closed. Killing thread. */
747 ret = check_thread_quit_pipe(pollfd, revents);
748 if (ret) {
749 err = 0;
750 goto exit;
751 }
752
753 if (revents & LPOLLIN) {
754 /*
755 * A new connection is requested, therefore a
756 * viewer connection is allocated in this
757 * thread, enqueued to a global queue and
758 * dequeued (and freed) in the worker thread.
759 */
760 int val = 1;
761 struct relay_connection *new_conn;
762 struct lttcomm_sock *newsock;
763
764 newsock = accept_live_sock(live_control_sock,
765 "Live socket to client");
766 if (!newsock) {
767 PERROR("accepting control sock");
768 goto error;
769 }
770 DBG("Relay viewer connection accepted socket %d", newsock->fd);
771
772 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
773 sizeof(val));
774 if (ret < 0) {
775 PERROR("setsockopt inet");
776 lttcomm_destroy_sock(newsock);
777 goto error;
778 }
779 new_conn = connection_create(newsock, RELAY_CONNECTION_UNKNOWN);
780 if (!new_conn) {
781 lttcomm_destroy_sock(newsock);
782 goto error;
783 }
784 /* Ownership assumed by the connection. */
785 newsock = NULL;
786
787 /* Enqueue request for the dispatcher thread. */
788 cds_wfcq_head_ptr_t head;
789 head.h = &viewer_conn_queue.head;
790 cds_wfcq_enqueue(head, &viewer_conn_queue.tail,
791 &new_conn->qnode);
792
793 /*
794 * Wake the dispatch queue futex.
795 * Implicit memory barrier with the
796 * exchange in cds_wfcq_enqueue.
797 */
798 futex_nto1_wake(&viewer_conn_queue.futex);
799 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
800 ERR("socket poll error");
801 goto error;
802 } else {
803 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
804 goto error;
805 }
806 }
807 }
808
809 exit:
810 error:
811 error_poll_add:
812 error_testpoint:
813 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
814 error_create_poll:
815 if (live_control_sock->fd >= 0) {
816 int sock_fd = live_control_sock->fd;
817
818 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker,
819 &sock_fd, 1, close_sock,
820 live_control_sock);
821 if (ret) {
822 PERROR("close");
823 }
824 live_control_sock->fd = -1;
825 }
826 lttcomm_destroy_sock(live_control_sock);
827 error_sock_control:
828 if (err) {
829 health_error();
830 DBG("Live viewer listener thread exited with error");
831 }
832 health_unregister(health_relayd);
833 rcu_unregister_thread();
834 DBG("Live viewer listener thread cleanup complete");
835 if (lttng_relay_stop_threads()) {
836 ERR("Error stopping threads");
837 }
838 return NULL;
839 }
840
841 /*
842 * This thread manages the dispatching of the requests to worker threads
843 */
844 static
845 void *thread_dispatcher(void *data)
846 {
847 int err = -1;
848 ssize_t ret;
849 struct cds_wfcq_node *node;
850 struct relay_connection *conn = NULL;
851
852 DBG("[thread] Live viewer relay dispatcher started");
853
854 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
855
856 if (testpoint(relayd_thread_live_dispatcher)) {
857 goto error_testpoint;
858 }
859
860 health_code_update();
861
862 for (;;) {
863 health_code_update();
864
865 /* Atomically prepare the queue futex */
866 futex_nto1_prepare(&viewer_conn_queue.futex);
867
868 if (CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
869 break;
870 }
871
872 do {
873 health_code_update();
874
875 /* Dequeue commands */
876 node = cds_wfcq_dequeue_blocking(&viewer_conn_queue.head,
877 &viewer_conn_queue.tail);
878 if (node == NULL) {
879 DBG("Woken up but nothing in the live-viewer "
880 "relay command queue");
881 /* Continue thread execution */
882 break;
883 }
884 conn = caa_container_of(node, struct relay_connection, qnode);
885 DBG("Dispatching viewer request waiting on sock %d",
886 conn->sock->fd);
887
888 /*
889 * Inform worker thread of the new request. This
890 * call is blocking so we can be assured that
891 * the data will be read at some point in time
892 * or wait to the end of the world :)
893 */
894 ret = lttng_write(live_conn_pipe[1], &conn, sizeof(conn));
895 if (ret < 0) {
896 PERROR("write conn pipe");
897 connection_put(conn);
898 goto error;
899 }
900 } while (node != NULL);
901
902 /* Futex wait on queue. Blocking call on futex() */
903 health_poll_entry();
904 futex_nto1_wait(&viewer_conn_queue.futex);
905 health_poll_exit();
906 }
907
908 /* Normal exit, no error */
909 err = 0;
910
911 error:
912 error_testpoint:
913 if (err) {
914 health_error();
915 ERR("Health error occurred in %s", __func__);
916 }
917 health_unregister(health_relayd);
918 DBG("Live viewer dispatch thread dying");
919 if (lttng_relay_stop_threads()) {
920 ERR("Error stopping threads");
921 }
922 return NULL;
923 }
924
925 /*
926 * Establish connection with the viewer and check the versions.
927 *
928 * Return 0 on success or else negative value.
929 */
930 static
931 int viewer_connect(struct relay_connection *conn)
932 {
933 int ret;
934 struct lttng_viewer_connect reply, msg;
935
936 conn->version_check_done = 1;
937
938 health_code_update();
939
940 DBG("Viewer is establishing a connection to the relayd.");
941
942 ret = recv_request(conn->sock, &msg, sizeof(msg));
943 if (ret < 0) {
944 goto end;
945 }
946
947 health_code_update();
948
949 memset(&reply, 0, sizeof(reply));
950 reply.major = RELAYD_VERSION_COMM_MAJOR;
951 reply.minor = RELAYD_VERSION_COMM_MINOR;
952
953 /* Major versions must be the same */
954 if (reply.major != be32toh(msg.major)) {
955 DBG("Incompatible major versions ([relayd] %u vs [client] %u)",
956 reply.major, be32toh(msg.major));
957 ret = -1;
958 goto end;
959 }
960
961 conn->major = reply.major;
962 /* We adapt to the lowest compatible version */
963 if (reply.minor <= be32toh(msg.minor)) {
964 conn->minor = reply.minor;
965 } else {
966 conn->minor = be32toh(msg.minor);
967 }
968
969 if (be32toh(msg.type) == LTTNG_VIEWER_CLIENT_COMMAND) {
970 conn->type = RELAY_VIEWER_COMMAND;
971 } else if (be32toh(msg.type) == LTTNG_VIEWER_CLIENT_NOTIFICATION) {
972 conn->type = RELAY_VIEWER_NOTIFICATION;
973 } else {
974 ERR("Unknown connection type : %u", be32toh(msg.type));
975 ret = -1;
976 goto end;
977 }
978
979 reply.major = htobe32(reply.major);
980 reply.minor = htobe32(reply.minor);
981 if (conn->type == RELAY_VIEWER_COMMAND) {
982 /*
983 * Increment outside of htobe64 macro, because the argument can
984 * be used more than once within the macro, and thus the
985 * operation may be undefined.
986 */
987 pthread_mutex_lock(&last_relay_viewer_session_id_lock);
988 last_relay_viewer_session_id++;
989 pthread_mutex_unlock(&last_relay_viewer_session_id_lock);
990 reply.viewer_session_id = htobe64(last_relay_viewer_session_id);
991 }
992
993 health_code_update();
994
995 ret = send_response(conn->sock, &reply, sizeof(reply));
996 if (ret < 0) {
997 goto end;
998 }
999
1000 health_code_update();
1001
1002 DBG("Version check done using protocol %u.%u", conn->major, conn->minor);
1003 ret = 0;
1004
1005 end:
1006 return ret;
1007 }
1008
1009 /*
1010 * Send the viewer the list of current sessions.
1011 * We need to create a copy of the hash table content because otherwise
1012 * we cannot assume the number of entries stays the same between getting
1013 * the number of HT elements and iteration over the HT.
1014 *
1015 * Return 0 on success or else a negative value.
1016 */
1017 static
1018 int viewer_list_sessions(struct relay_connection *conn)
1019 {
1020 int ret = 0;
1021 struct lttng_viewer_list_sessions session_list;
1022 struct lttng_ht_iter iter;
1023 struct relay_session *session;
1024 struct lttng_viewer_session *send_session_buf = NULL;
1025 uint32_t buf_count = SESSION_BUF_DEFAULT_COUNT;
1026 uint32_t count = 0;
1027
1028 DBG("List sessions received");
1029
1030 send_session_buf = (lttng_viewer_session *) zmalloc(SESSION_BUF_DEFAULT_COUNT * sizeof(*send_session_buf));
1031 if (!send_session_buf) {
1032 return -1;
1033 }
1034
1035 rcu_read_lock();
1036 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, session,
1037 session_n.node) {
1038 struct lttng_viewer_session *send_session;
1039
1040 health_code_update();
1041
1042 pthread_mutex_lock(&session->lock);
1043 if (session->connection_closed) {
1044 /* Skip closed session */
1045 goto next_session;
1046 }
1047
1048 if (count >= buf_count) {
1049 struct lttng_viewer_session *newbuf;
1050 uint32_t new_buf_count = buf_count << 1;
1051
1052 newbuf = (lttng_viewer_session *) realloc(send_session_buf,
1053 new_buf_count * sizeof(*send_session_buf));
1054 if (!newbuf) {
1055 ret = -1;
1056 goto break_loop;
1057 }
1058 send_session_buf = newbuf;
1059 buf_count = new_buf_count;
1060 }
1061 send_session = &send_session_buf[count];
1062 if (lttng_strncpy(send_session->session_name,
1063 session->session_name,
1064 sizeof(send_session->session_name))) {
1065 ret = -1;
1066 goto break_loop;
1067 }
1068 if (lttng_strncpy(send_session->hostname, session->hostname,
1069 sizeof(send_session->hostname))) {
1070 ret = -1;
1071 goto break_loop;
1072 }
1073 send_session->id = htobe64(session->id);
1074 send_session->live_timer = htobe32(session->live_timer);
1075 if (session->viewer_attached) {
1076 send_session->clients = htobe32(1);
1077 } else {
1078 send_session->clients = htobe32(0);
1079 }
1080 send_session->streams = htobe32(session->stream_count);
1081 count++;
1082 next_session:
1083 pthread_mutex_unlock(&session->lock);
1084 continue;
1085 break_loop:
1086 pthread_mutex_unlock(&session->lock);
1087 break;
1088 }
1089 rcu_read_unlock();
1090 if (ret < 0) {
1091 goto end_free;
1092 }
1093
1094 session_list.sessions_count = htobe32(count);
1095
1096 health_code_update();
1097
1098 ret = send_response(conn->sock, &session_list, sizeof(session_list));
1099 if (ret < 0) {
1100 goto end_free;
1101 }
1102
1103 health_code_update();
1104
1105 ret = send_response(conn->sock, send_session_buf,
1106 count * sizeof(*send_session_buf));
1107 if (ret < 0) {
1108 goto end_free;
1109 }
1110 health_code_update();
1111
1112 ret = 0;
1113 end_free:
1114 free(send_session_buf);
1115 return ret;
1116 }
1117
1118 /*
1119 * Send the viewer the list of current streams.
1120 */
1121 static
1122 int viewer_get_new_streams(struct relay_connection *conn)
1123 {
1124 int ret, send_streams = 0;
1125 uint32_t nb_created = 0, nb_unsent = 0, nb_streams = 0, nb_total = 0;
1126 struct lttng_viewer_new_streams_request request;
1127 struct lttng_viewer_new_streams_response response;
1128 struct relay_session *session = NULL;
1129 uint64_t session_id;
1130 bool closed = false;
1131
1132 LTTNG_ASSERT(conn);
1133
1134 DBG("Get new streams received");
1135
1136 health_code_update();
1137
1138 /* Receive the request from the connected client. */
1139 ret = recv_request(conn->sock, &request, sizeof(request));
1140 if (ret < 0) {
1141 goto error;
1142 }
1143 session_id = be64toh(request.session_id);
1144
1145 health_code_update();
1146
1147 memset(&response, 0, sizeof(response));
1148
1149 session = session_get_by_id(session_id);
1150 if (!session) {
1151 DBG("Relay session %" PRIu64 " not found", session_id);
1152 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_ERR);
1153 goto send_reply;
1154 }
1155
1156 if (!viewer_session_is_attached(conn->viewer_session, session)) {
1157 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_ERR);
1158 goto send_reply;
1159 }
1160
1161 /*
1162 * For any new stream, create it with LTTNG_VIEWER_SEEK_BEGINNING since
1163 * that at this point the client is already attached to the session.Aany
1164 * initial stream will have been created with the seek type at attach
1165 * time (for now most readers use the LTTNG_VIEWER_SEEK_LAST on attach).
1166 * Otherwise any event happening in a new stream between the attach and
1167 * a call to viewer_get_new_streams will be "lost" (never received) from
1168 * the viewer's point of view.
1169 */
1170 pthread_mutex_lock(&session->lock);
1171 /*
1172 * If a session rotation is ongoing, do not attempt to open any
1173 * stream, because the chunk can be in an intermediate state
1174 * due to directory renaming.
1175 */
1176 if (session->ongoing_rotation) {
1177 DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
1178 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_NO_NEW);
1179 goto send_reply_unlock;
1180 }
1181 ret = make_viewer_streams(session,
1182 conn->viewer_session,
1183 LTTNG_VIEWER_SEEK_BEGINNING, &nb_total, &nb_unsent,
1184 &nb_created, &closed);
1185 if (ret < 0) {
1186 goto error_unlock_session;
1187 }
1188 send_streams = 1;
1189 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_OK);
1190
1191 /* Only send back the newly created streams with the unsent ones. */
1192 nb_streams = nb_created + nb_unsent;
1193 response.streams_count = htobe32(nb_streams);
1194
1195 /*
1196 * If the session is closed, HUP when there are no more streams
1197 * with data.
1198 */
1199 if (closed && nb_total == 0) {
1200 send_streams = 0;
1201 response.streams_count = 0;
1202 response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_HUP);
1203 goto send_reply_unlock;
1204 }
1205 send_reply_unlock:
1206 pthread_mutex_unlock(&session->lock);
1207
1208 send_reply:
1209 health_code_update();
1210 ret = send_response(conn->sock, &response, sizeof(response));
1211 if (ret < 0) {
1212 goto end_put_session;
1213 }
1214 health_code_update();
1215
1216 /*
1217 * Unknown or empty session, just return gracefully, the viewer
1218 * knows what is happening.
1219 */
1220 if (!send_streams || !nb_streams) {
1221 ret = 0;
1222 goto end_put_session;
1223 }
1224
1225 /*
1226 * Send stream and *DON'T* ignore the sent flag so every viewer
1227 * streams that were not sent from that point will be sent to
1228 * the viewer.
1229 */
1230 ret = send_viewer_streams(conn->sock, session_id, 0);
1231 if (ret < 0) {
1232 goto end_put_session;
1233 }
1234
1235 end_put_session:
1236 if (session) {
1237 session_put(session);
1238 }
1239 error:
1240 return ret;
1241 error_unlock_session:
1242 pthread_mutex_unlock(&session->lock);
1243 session_put(session);
1244 return ret;
1245 }
1246
1247 /*
1248 * Send the viewer the list of current sessions.
1249 */
1250 static
1251 int viewer_attach_session(struct relay_connection *conn)
1252 {
1253 int send_streams = 0;
1254 ssize_t ret;
1255 uint32_t nb_streams = 0;
1256 enum lttng_viewer_seek seek_type;
1257 struct lttng_viewer_attach_session_request request;
1258 struct lttng_viewer_attach_session_response response;
1259 struct relay_session *session = NULL;
1260 enum lttng_viewer_attach_return_code viewer_attach_status;
1261 bool closed = false;
1262 uint64_t session_id;
1263
1264 LTTNG_ASSERT(conn);
1265
1266 health_code_update();
1267
1268 /* Receive the request from the connected client. */
1269 ret = recv_request(conn->sock, &request, sizeof(request));
1270 if (ret < 0) {
1271 goto error;
1272 }
1273
1274 session_id = be64toh(request.session_id);
1275 health_code_update();
1276
1277 memset(&response, 0, sizeof(response));
1278
1279 if (!conn->viewer_session) {
1280 DBG("Client trying to attach before creating a live viewer session");
1281 response.status = htobe32(LTTNG_VIEWER_ATTACH_NO_SESSION);
1282 goto send_reply;
1283 }
1284
1285 session = session_get_by_id(session_id);
1286 if (!session) {
1287 DBG("Relay session %" PRIu64 " not found", session_id);
1288 response.status = htobe32(LTTNG_VIEWER_ATTACH_UNK);
1289 goto send_reply;
1290 }
1291 DBG("Attach session ID %" PRIu64 " received", session_id);
1292
1293 pthread_mutex_lock(&session->lock);
1294 if (session->live_timer == 0) {
1295 DBG("Not live session");
1296 response.status = htobe32(LTTNG_VIEWER_ATTACH_NOT_LIVE);
1297 goto send_reply;
1298 }
1299
1300 send_streams = 1;
1301 viewer_attach_status = viewer_session_attach(conn->viewer_session,
1302 session);
1303 if (viewer_attach_status != LTTNG_VIEWER_ATTACH_OK) {
1304 response.status = htobe32(viewer_attach_status);
1305 goto send_reply;
1306 }
1307
1308 switch (be32toh(request.seek)) {
1309 case LTTNG_VIEWER_SEEK_BEGINNING:
1310 case LTTNG_VIEWER_SEEK_LAST:
1311 response.status = htobe32(LTTNG_VIEWER_ATTACH_OK);
1312 seek_type = (lttng_viewer_seek) be32toh(request.seek);
1313 break;
1314 default:
1315 ERR("Wrong seek parameter");
1316 response.status = htobe32(LTTNG_VIEWER_ATTACH_SEEK_ERR);
1317 send_streams = 0;
1318 goto send_reply;
1319 }
1320
1321 /*
1322 * If a session rotation is ongoing, do not attempt to open any
1323 * stream, because the chunk can be in an intermediate state
1324 * due to directory renaming.
1325 */
1326 if (session->ongoing_rotation) {
1327 DBG("Relay session %" PRIu64 " rotation ongoing", session_id);
1328 send_streams = 0;
1329 goto send_reply;
1330 }
1331
1332 ret = make_viewer_streams(session,
1333 conn->viewer_session, seek_type,
1334 &nb_streams, NULL, NULL, &closed);
1335 if (ret < 0) {
1336 goto end_put_session;
1337 }
1338 pthread_mutex_unlock(&session->lock);
1339 session_put(session);
1340 session = NULL;
1341
1342 response.streams_count = htobe32(nb_streams);
1343 /*
1344 * If the session is closed when the viewer is attaching, it
1345 * means some of the streams may have been concurrently removed,
1346 * so we don't allow the viewer to attach, even if there are
1347 * streams available.
1348 */
1349 if (closed) {
1350 send_streams = 0;
1351 response.streams_count = 0;
1352 response.status = htobe32(LTTNG_VIEWER_ATTACH_UNK);
1353 goto send_reply;
1354 }
1355
1356 send_reply:
1357 health_code_update();
1358 ret = send_response(conn->sock, &response, sizeof(response));
1359 if (ret < 0) {
1360 goto end_put_session;
1361 }
1362 health_code_update();
1363
1364 /*
1365 * Unknown or empty session, just return gracefully, the viewer
1366 * knows what is happening.
1367 */
1368 if (!send_streams || !nb_streams) {
1369 ret = 0;
1370 goto end_put_session;
1371 }
1372
1373 /* Send stream and ignore the sent flag. */
1374 ret = send_viewer_streams(conn->sock, session_id, 1);
1375 if (ret < 0) {
1376 goto end_put_session;
1377 }
1378
1379 end_put_session:
1380 if (session) {
1381 pthread_mutex_unlock(&session->lock);
1382 session_put(session);
1383 }
1384 error:
1385 return ret;
1386 }
1387
1388 /*
1389 * Open the index file if needed for the given vstream.
1390 *
1391 * If an index file is successfully opened, the vstream will set it as its
1392 * current index file.
1393 *
1394 * Return 0 on success, a negative value on error (-ENOENT if not ready yet).
1395 *
1396 * Called with rstream lock held.
1397 */
1398 static int try_open_index(struct relay_viewer_stream *vstream,
1399 struct relay_stream *rstream)
1400 {
1401 int ret = 0;
1402 const uint32_t connection_major = rstream->trace->session->major;
1403 const uint32_t connection_minor = rstream->trace->session->minor;
1404 enum lttng_trace_chunk_status chunk_status;
1405
1406 if (vstream->index_file) {
1407 goto end;
1408 }
1409
1410 /*
1411 * First time, we open the index file and at least one index is ready.
1412 */
1413 if (rstream->index_received_seqcount == 0 ||
1414 !vstream->stream_file.trace_chunk) {
1415 ret = -ENOENT;
1416 goto end;
1417 }
1418
1419 chunk_status = lttng_index_file_create_from_trace_chunk_read_only(
1420 vstream->stream_file.trace_chunk, rstream->path_name,
1421 rstream->channel_name, rstream->tracefile_size,
1422 vstream->current_tracefile_id,
1423 lttng_to_index_major(connection_major, connection_minor),
1424 lttng_to_index_minor(connection_major, connection_minor),
1425 true, &vstream->index_file);
1426 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
1427 if (chunk_status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE) {
1428 ret = -ENOENT;
1429 } else {
1430 ret = -1;
1431 }
1432 }
1433
1434 end:
1435 return ret;
1436 }
1437
1438 /*
1439 * Check the status of the index for the given stream. This function
1440 * updates the index structure if needed and can put (close) the vstream
1441 * in the HUP situation.
1442 *
1443 * Return 0 means that we can proceed with the index. A value of 1 means
1444 * that the index has been updated and is ready to be sent to the
1445 * client. A negative value indicates an error that can't be handled.
1446 *
1447 * Called with rstream lock held.
1448 */
1449 static int check_index_status(struct relay_viewer_stream *vstream,
1450 struct relay_stream *rstream, struct ctf_trace *trace,
1451 struct lttng_viewer_index *index)
1452 {
1453 int ret;
1454
1455 DBG("Check index status: index_received_seqcount %" PRIu64 " "
1456 "index_sent_seqcount %" PRIu64 " "
1457 "for stream %" PRIu64,
1458 rstream->index_received_seqcount,
1459 vstream->index_sent_seqcount,
1460 vstream->stream->stream_handle);
1461 if ((trace->session->connection_closed || rstream->closed)
1462 && rstream->index_received_seqcount
1463 == vstream->index_sent_seqcount) {
1464 /*
1465 * Last index sent and session connection or relay
1466 * stream are closed.
1467 */
1468 index->status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1469 goto hup;
1470 } else if (rstream->beacon_ts_end != -1ULL &&
1471 (rstream->index_received_seqcount == 0 ||
1472 (vstream->index_sent_seqcount != 0 &&
1473 rstream->index_received_seqcount
1474 <= vstream->index_sent_seqcount))) {
1475 /*
1476 * We've received a synchronization beacon and the last index
1477 * available has been sent, the index for now is inactive.
1478 *
1479 * In this case, we have received a beacon which allows us to
1480 * inform the client of a time interval during which we can
1481 * guarantee that there are no events to read (and never will
1482 * be).
1483 *
1484 * The sent seqcount can grow higher than receive seqcount on
1485 * clear because the rotation performed by clear will push
1486 * the index_sent_seqcount ahead (see
1487 * viewer_stream_sync_tracefile_array_tail) and skip over
1488 * packet sequence numbers.
1489 */
1490 index->status = htobe32(LTTNG_VIEWER_INDEX_INACTIVE);
1491 index->timestamp_end = htobe64(rstream->beacon_ts_end);
1492 index->stream_id = htobe64(rstream->ctf_stream_id);
1493 DBG("Check index status: inactive with beacon, for stream %" PRIu64,
1494 vstream->stream->stream_handle);
1495 goto index_ready;
1496 } else if (rstream->index_received_seqcount == 0 ||
1497 (vstream->index_sent_seqcount != 0 &&
1498 rstream->index_received_seqcount
1499 <= vstream->index_sent_seqcount)) {
1500 /*
1501 * This checks whether received <= sent seqcount. In
1502 * this case, we have not received a beacon. Therefore,
1503 * we can only ask the client to retry later.
1504 *
1505 * The sent seqcount can grow higher than receive seqcount on
1506 * clear because the rotation performed by clear will push
1507 * the index_sent_seqcount ahead (see
1508 * viewer_stream_sync_tracefile_array_tail) and skip over
1509 * packet sequence numbers.
1510 */
1511 index->status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1512 DBG("Check index status: retry for stream %" PRIu64,
1513 vstream->stream->stream_handle);
1514 goto index_ready;
1515 } else if (!tracefile_array_seq_in_file(rstream->tfa,
1516 vstream->current_tracefile_id,
1517 vstream->index_sent_seqcount)) {
1518 /*
1519 * The next index we want to send cannot be read either
1520 * because we need to perform a rotation, or due to
1521 * the producer having overwritten its trace file.
1522 */
1523 DBG("Viewer stream %" PRIu64 " rotation",
1524 vstream->stream->stream_handle);
1525 ret = viewer_stream_rotate(vstream);
1526 if (ret == 1) {
1527 /* EOF across entire stream. */
1528 index->status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1529 goto hup;
1530 }
1531 /*
1532 * If we have been pushed due to overwrite, it
1533 * necessarily means there is data that can be read in
1534 * the stream. If we rotated because we reached the end
1535 * of a tracefile, it means the following tracefile
1536 * needs to contain at least one index, else we would
1537 * have already returned LTTNG_VIEWER_INDEX_RETRY to the
1538 * viewer. The updated index_sent_seqcount needs to
1539 * point to a readable index entry now.
1540 *
1541 * In the case where we "rotate" on a single file, we
1542 * can end up in a case where the requested index is
1543 * still unavailable.
1544 */
1545 if (rstream->tracefile_count == 1 &&
1546 !tracefile_array_seq_in_file(
1547 rstream->tfa,
1548 vstream->current_tracefile_id,
1549 vstream->index_sent_seqcount)) {
1550 index->status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1551 DBG("Check index status: retry: "
1552 "tracefile array sequence number %" PRIu64
1553 " not in file for stream %" PRIu64,
1554 vstream->index_sent_seqcount,
1555 vstream->stream->stream_handle);
1556 goto index_ready;
1557 }
1558 LTTNG_ASSERT(tracefile_array_seq_in_file(rstream->tfa,
1559 vstream->current_tracefile_id,
1560 vstream->index_sent_seqcount));
1561 }
1562 /* ret == 0 means successful so we continue. */
1563 ret = 0;
1564 return ret;
1565
1566 hup:
1567 viewer_stream_put(vstream);
1568 index_ready:
1569 return 1;
1570 }
1571
1572 static
1573 void viewer_stream_rotate_to_trace_chunk(struct relay_viewer_stream *vstream,
1574 struct lttng_trace_chunk *new_trace_chunk)
1575 {
1576 lttng_trace_chunk_put(vstream->stream_file.trace_chunk);
1577
1578 if (new_trace_chunk) {
1579 const bool acquired_reference = lttng_trace_chunk_get(
1580 new_trace_chunk);
1581
1582 LTTNG_ASSERT(acquired_reference);
1583 }
1584
1585 vstream->stream_file.trace_chunk = new_trace_chunk;
1586 viewer_stream_sync_tracefile_array_tail(vstream);
1587 viewer_stream_close_files(vstream);
1588 }
1589
1590 /*
1591 * Send the next index for a stream.
1592 *
1593 * Return 0 on success or else a negative value.
1594 */
1595 static
1596 int viewer_get_next_index(struct relay_connection *conn)
1597 {
1598 int ret;
1599 struct lttng_viewer_get_next_index request_index;
1600 struct lttng_viewer_index viewer_index;
1601 struct ctf_packet_index packet_index;
1602 struct relay_viewer_stream *vstream = NULL;
1603 struct relay_stream *rstream = NULL;
1604 struct ctf_trace *ctf_trace = NULL;
1605 struct relay_viewer_stream *metadata_viewer_stream = NULL;
1606 bool viewer_stream_and_session_in_same_chunk, viewer_stream_one_rotation_behind;
1607 uint64_t stream_file_chunk_id = -1ULL, viewer_session_chunk_id = -1ULL;
1608 enum lttng_trace_chunk_status status;
1609
1610 LTTNG_ASSERT(conn);
1611
1612 DBG("Viewer get next index");
1613
1614 memset(&viewer_index, 0, sizeof(viewer_index));
1615 health_code_update();
1616
1617 ret = recv_request(conn->sock, &request_index, sizeof(request_index));
1618 if (ret < 0) {
1619 goto end;
1620 }
1621 health_code_update();
1622
1623 vstream = viewer_stream_get_by_id(be64toh(request_index.stream_id));
1624 if (!vstream) {
1625 DBG("Client requested index of unknown stream id %" PRIu64,
1626 (uint64_t) be64toh(request_index.stream_id));
1627 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1628 goto send_reply;
1629 }
1630
1631 /* Use back. ref. Protected by refcounts. */
1632 rstream = vstream->stream;
1633 ctf_trace = rstream->trace;
1634
1635 /* metadata_viewer_stream may be NULL. */
1636 metadata_viewer_stream =
1637 ctf_trace_get_viewer_metadata_stream(ctf_trace);
1638
1639 pthread_mutex_lock(&rstream->lock);
1640
1641 /*
1642 * The viewer should not ask for index on metadata stream.
1643 */
1644 if (rstream->is_metadata) {
1645 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1646 goto send_reply;
1647 }
1648
1649 if (rstream->ongoing_rotation.is_set) {
1650 /* Rotation is ongoing, try again later. */
1651 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1652 goto send_reply;
1653 }
1654
1655 if (rstream->trace->session->ongoing_rotation) {
1656 /* Rotation is ongoing, try again later. */
1657 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1658 goto send_reply;
1659 }
1660
1661 /*
1662 * Transition the viewer session into the newest trace chunk available.
1663 */
1664 if (!lttng_trace_chunk_ids_equal(
1665 conn->viewer_session->current_trace_chunk,
1666 rstream->trace_chunk)) {
1667 DBG("Relay stream and viewer chunk ids differ");
1668
1669 ret = viewer_session_set_trace_chunk_copy(
1670 conn->viewer_session,
1671 rstream->trace_chunk);
1672 if (ret) {
1673 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1674 goto send_reply;
1675 }
1676 }
1677
1678 /*
1679 * Transition the viewer stream into the latest trace chunk available.
1680 *
1681 * Note that the stream must _not_ rotate in one precise condition:
1682 * the relay stream has rotated to a NULL trace chunk and the viewer
1683 * stream is consuming the trace chunk that was active just before
1684 * that rotation to NULL.
1685 *
1686 * This allows clients to consume all the packets of a trace chunk
1687 * after a session's destruction.
1688 */
1689 if (vstream->stream_file.trace_chunk) {
1690 status = lttng_trace_chunk_get_id(
1691 vstream->stream_file.trace_chunk,
1692 &stream_file_chunk_id);
1693 LTTNG_ASSERT(status == LTTNG_TRACE_CHUNK_STATUS_OK);
1694 }
1695 if (conn->viewer_session->current_trace_chunk) {
1696 status = lttng_trace_chunk_get_id(
1697 conn->viewer_session->current_trace_chunk,
1698 &viewer_session_chunk_id);
1699 LTTNG_ASSERT(status == LTTNG_TRACE_CHUNK_STATUS_OK);
1700 }
1701
1702 viewer_stream_and_session_in_same_chunk = lttng_trace_chunk_ids_equal(
1703 conn->viewer_session->current_trace_chunk,
1704 vstream->stream_file.trace_chunk);
1705 viewer_stream_one_rotation_behind = rstream->completed_rotation_count ==
1706 vstream->last_seen_rotation_count + 1;
1707
1708 if (viewer_stream_and_session_in_same_chunk) {
1709 DBG("Transition to latest chunk check (%s -> %s): Same chunk, no need to rotate",
1710 vstream->stream_file.trace_chunk ?
1711 std::to_string(stream_file_chunk_id).c_str() :
1712 "None",
1713 conn->viewer_session->current_trace_chunk ?
1714 std::to_string(viewer_session_chunk_id).c_str() :
1715 "None");
1716 } else if (viewer_stream_one_rotation_behind && !rstream->trace_chunk) {
1717 DBG("Transition to latest chunk check (%s -> %s): One chunk behind relay stream which is being destroyed, no need to rotate",
1718 vstream->stream_file.trace_chunk ?
1719 std::to_string(stream_file_chunk_id).c_str() :
1720 "None",
1721 conn->viewer_session->current_trace_chunk ?
1722 std::to_string(viewer_session_chunk_id).c_str() :
1723 "None");
1724 } else {
1725 DBG("Transition to latest chunk check (%s -> %s): Viewer stream chunk ID and viewer session chunk ID differ, rotating viewer stream",
1726 vstream->stream_file.trace_chunk ?
1727 std::to_string(stream_file_chunk_id).c_str() :
1728 "None",
1729 conn->viewer_session->current_trace_chunk ?
1730 std::to_string(viewer_session_chunk_id).c_str() :
1731 "None");
1732
1733 viewer_stream_rotate_to_trace_chunk(vstream,
1734 conn->viewer_session->current_trace_chunk);
1735 vstream->last_seen_rotation_count =
1736 rstream->completed_rotation_count;
1737 }
1738
1739 ret = check_index_status(vstream, rstream, ctf_trace, &viewer_index);
1740 if (ret < 0) {
1741 goto error_put;
1742 } else if (ret == 1) {
1743 /*
1744 * We have no index to send and check_index_status has populated
1745 * viewer_index's status.
1746 */
1747 goto send_reply;
1748 }
1749 /* At this point, ret is 0 thus we will be able to read the index. */
1750 LTTNG_ASSERT(!ret);
1751
1752 /* Try to open an index if one is needed for that stream. */
1753 ret = try_open_index(vstream, rstream);
1754 if (ret == -ENOENT) {
1755 if (rstream->closed) {
1756 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1757 goto send_reply;
1758 } else {
1759 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY);
1760 goto send_reply;
1761 }
1762 }
1763 if (ret < 0) {
1764 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1765 goto send_reply;
1766 }
1767
1768 /*
1769 * vstream->stream_fd may be NULL if it has been closed by
1770 * tracefile rotation, or if we are at the beginning of the
1771 * stream. We open the data stream file here to protect against
1772 * overwrite caused by tracefile rotation (in association with
1773 * unlink performed before overwrite).
1774 */
1775 if (!vstream->stream_file.handle) {
1776 char file_path[LTTNG_PATH_MAX];
1777 struct fs_handle *fs_handle;
1778
1779 ret = utils_stream_file_path(rstream->path_name,
1780 rstream->channel_name, rstream->tracefile_size,
1781 vstream->current_tracefile_id, NULL, file_path,
1782 sizeof(file_path));
1783 if (ret < 0) {
1784 goto error_put;
1785 }
1786
1787 /*
1788 * It is possible the the file we are trying to open is
1789 * missing if the stream has been closed (application exits with
1790 * per-pid buffers) and a clear command has been performed.
1791 */
1792 status = lttng_trace_chunk_open_fs_handle(
1793 vstream->stream_file.trace_chunk,
1794 file_path, O_RDONLY, 0, &fs_handle, true);
1795 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
1796 if (status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE &&
1797 rstream->closed) {
1798 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP);
1799 goto send_reply;
1800 }
1801 PERROR("Failed to open trace file for viewer stream");
1802 goto error_put;
1803 }
1804 vstream->stream_file.handle = fs_handle;
1805 }
1806
1807 ret = check_new_streams(conn);
1808 if (ret < 0) {
1809 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1810 goto send_reply;
1811 } else if (ret == 1) {
1812 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1813 }
1814
1815 ret = lttng_index_file_read(vstream->index_file, &packet_index);
1816 if (ret) {
1817 ERR("Relay error reading index file");
1818 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR);
1819 goto send_reply;
1820 } else {
1821 viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_OK);
1822 vstream->index_sent_seqcount++;
1823 }
1824
1825 /*
1826 * Indexes are stored in big endian, no need to switch before sending.
1827 */
1828 DBG("Sending viewer index for stream %" PRIu64 " offset %" PRIu64,
1829 rstream->stream_handle,
1830 (uint64_t) be64toh(packet_index.offset));
1831 viewer_index.offset = packet_index.offset;
1832 viewer_index.packet_size = packet_index.packet_size;
1833 viewer_index.content_size = packet_index.content_size;
1834 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1835 viewer_index.timestamp_end = packet_index.timestamp_end;
1836 viewer_index.events_discarded = packet_index.events_discarded;
1837 viewer_index.stream_id = packet_index.stream_id;
1838
1839 send_reply:
1840 if (rstream) {
1841 pthread_mutex_unlock(&rstream->lock);
1842 }
1843
1844 if (metadata_viewer_stream) {
1845 pthread_mutex_lock(&metadata_viewer_stream->stream->lock);
1846 DBG("get next index metadata check: recv %" PRIu64
1847 " sent %" PRIu64,
1848 metadata_viewer_stream->stream->metadata_received,
1849 metadata_viewer_stream->metadata_sent);
1850 if (!metadata_viewer_stream->stream->metadata_received ||
1851 metadata_viewer_stream->stream->metadata_received >
1852 metadata_viewer_stream->metadata_sent) {
1853 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1854 }
1855 pthread_mutex_unlock(&metadata_viewer_stream->stream->lock);
1856 }
1857
1858 viewer_index.flags = htobe32(viewer_index.flags);
1859 health_code_update();
1860
1861 ret = send_response(conn->sock, &viewer_index, sizeof(viewer_index));
1862 if (ret < 0) {
1863 goto end;
1864 }
1865 health_code_update();
1866
1867 if (vstream) {
1868 DBG("Index %" PRIu64 " for stream %" PRIu64 " sent",
1869 vstream->index_sent_seqcount,
1870 vstream->stream->stream_handle);
1871 }
1872 end:
1873 if (metadata_viewer_stream) {
1874 viewer_stream_put(metadata_viewer_stream);
1875 }
1876 if (vstream) {
1877 viewer_stream_put(vstream);
1878 }
1879 return ret;
1880
1881 error_put:
1882 pthread_mutex_unlock(&rstream->lock);
1883 if (metadata_viewer_stream) {
1884 viewer_stream_put(metadata_viewer_stream);
1885 }
1886 viewer_stream_put(vstream);
1887 return ret;
1888 }
1889
1890 /*
1891 * Send the next index for a stream
1892 *
1893 * Return 0 on success or else a negative value.
1894 */
1895 static
1896 int viewer_get_packet(struct relay_connection *conn)
1897 {
1898 int ret;
1899 off_t lseek_ret;
1900 char *reply = NULL;
1901 struct lttng_viewer_get_packet get_packet_info;
1902 struct lttng_viewer_trace_packet reply_header;
1903 struct relay_viewer_stream *vstream = NULL;
1904 uint32_t reply_size = sizeof(reply_header);
1905 uint32_t packet_data_len = 0;
1906 ssize_t read_len;
1907 uint64_t stream_id;
1908
1909 DBG2("Relay get data packet");
1910
1911 health_code_update();
1912
1913 ret = recv_request(conn->sock, &get_packet_info,
1914 sizeof(get_packet_info));
1915 if (ret < 0) {
1916 goto end;
1917 }
1918 health_code_update();
1919
1920 /* From this point on, the error label can be reached. */
1921 memset(&reply_header, 0, sizeof(reply_header));
1922 stream_id = (uint64_t) be64toh(get_packet_info.stream_id);
1923
1924 vstream = viewer_stream_get_by_id(stream_id);
1925 if (!vstream) {
1926 DBG("Client requested packet of unknown stream id %" PRIu64,
1927 stream_id);
1928 reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_ERR);
1929 goto send_reply_nolock;
1930 } else {
1931 packet_data_len = be32toh(get_packet_info.len);
1932 reply_size += packet_data_len;
1933 }
1934
1935 reply = (char *) zmalloc(reply_size);
1936 if (!reply) {
1937 PERROR("packet reply zmalloc");
1938 reply_size = sizeof(reply_header);
1939 goto error;
1940 }
1941
1942 pthread_mutex_lock(&vstream->stream->lock);
1943 lseek_ret = fs_handle_seek(vstream->stream_file.handle,
1944 be64toh(get_packet_info.offset), SEEK_SET);
1945 if (lseek_ret < 0) {
1946 PERROR("Failed to seek file system handle of viewer stream %" PRIu64
1947 " to offset %" PRIu64,
1948 stream_id,
1949 (uint64_t) be64toh(get_packet_info.offset));
1950 goto error;
1951 }
1952 read_len = fs_handle_read(vstream->stream_file.handle,
1953 reply + sizeof(reply_header), packet_data_len);
1954 if (read_len < packet_data_len) {
1955 PERROR("Failed to read from file system handle of viewer stream id %" PRIu64
1956 ", offset: %" PRIu64,
1957 stream_id,
1958 (uint64_t) be64toh(get_packet_info.offset));
1959 goto error;
1960 }
1961 reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_OK);
1962 reply_header.len = htobe32(packet_data_len);
1963 goto send_reply;
1964
1965 error:
1966 /* No payload to send on error. */
1967 reply_size = sizeof(reply_header);
1968 reply_header.status = htobe32(LTTNG_VIEWER_GET_PACKET_ERR);
1969
1970 send_reply:
1971 if (vstream) {
1972 pthread_mutex_unlock(&vstream->stream->lock);
1973 }
1974 send_reply_nolock:
1975
1976 health_code_update();
1977
1978 if (reply) {
1979 memcpy(reply, &reply_header, sizeof(reply_header));
1980 ret = send_response(conn->sock, reply, reply_size);
1981 } else {
1982 /* No reply to send. */
1983 ret = send_response(conn->sock, &reply_header,
1984 reply_size);
1985 }
1986
1987 health_code_update();
1988 if (ret < 0) {
1989 PERROR("sendmsg of packet data failed");
1990 goto end_free;
1991 }
1992
1993 DBG("Sent %u bytes for stream %" PRIu64, reply_size, stream_id);
1994
1995 end_free:
1996 free(reply);
1997 end:
1998 if (vstream) {
1999 viewer_stream_put(vstream);
2000 }
2001 return ret;
2002 }
2003
2004 /*
2005 * Send the session's metadata
2006 *
2007 * Return 0 on success else a negative value.
2008 */
2009 static
2010 int viewer_get_metadata(struct relay_connection *conn)
2011 {
2012 int ret = 0;
2013 int fd = -1;
2014 ssize_t read_len;
2015 uint64_t len = 0;
2016 char *data = NULL;
2017 struct lttng_viewer_get_metadata request;
2018 struct lttng_viewer_metadata_packet reply;
2019 struct relay_viewer_stream *vstream = NULL;
2020
2021 LTTNG_ASSERT(conn);
2022
2023 DBG("Relay get metadata");
2024
2025 health_code_update();
2026
2027 ret = recv_request(conn->sock, &request, sizeof(request));
2028 if (ret < 0) {
2029 goto end;
2030 }
2031 health_code_update();
2032
2033 memset(&reply, 0, sizeof(reply));
2034
2035 vstream = viewer_stream_get_by_id(be64toh(request.stream_id));
2036 if (!vstream) {
2037 /*
2038 * The metadata stream can be closed by a CLOSE command
2039 * just before we attach. It can also be closed by
2040 * per-pid tracing during tracing. Therefore, it is
2041 * possible that we cannot find this viewer stream.
2042 * Reply back to the client with an error if we cannot
2043 * find it.
2044 */
2045 DBG("Client requested metadata of unknown stream id %" PRIu64,
2046 (uint64_t) be64toh(request.stream_id));
2047 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2048 goto send_reply;
2049 }
2050 pthread_mutex_lock(&vstream->stream->lock);
2051 if (!vstream->stream->is_metadata) {
2052 ERR("Invalid metadata stream");
2053 goto error;
2054 }
2055
2056 if (vstream->metadata_sent >= vstream->stream->metadata_received) {
2057 /*
2058 * The live viewers expect to receive a NO_NEW_METADATA
2059 * status before a stream disappears, otherwise they abort the
2060 * entire live connection when receiving an error status.
2061 *
2062 * Clear feature resets the metadata_sent to 0 until the
2063 * same metadata is received again.
2064 */
2065 reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
2066 /*
2067 * The live viewer considers a closed 0 byte metadata stream as
2068 * an error.
2069 */
2070 if (vstream->metadata_sent > 0) {
2071 if (vstream->stream->closed && vstream->stream->no_new_metadata_notified) {
2072 /* Release ownership for the viewer metadata stream. */
2073 viewer_stream_put(vstream);
2074 }
2075 vstream->stream->no_new_metadata_notified = true;
2076 }
2077 goto send_reply;
2078 }
2079
2080 if (vstream->stream->trace_chunk &&
2081 !lttng_trace_chunk_ids_equal(
2082 conn->viewer_session->current_trace_chunk,
2083 vstream->stream->trace_chunk)) {
2084 /* A rotation has occurred on the relay stream. */
2085 DBG("Metadata relay stream and viewer chunk ids differ");
2086
2087 ret = viewer_session_set_trace_chunk_copy(
2088 conn->viewer_session,
2089 vstream->stream->trace_chunk);
2090 if (ret) {
2091 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2092 goto send_reply;
2093 }
2094 }
2095
2096 if (conn->viewer_session->current_trace_chunk &&
2097 !lttng_trace_chunk_ids_equal(conn->viewer_session->current_trace_chunk,
2098 vstream->stream_file.trace_chunk)) {
2099 bool acquired_reference;
2100
2101 DBG("Viewer session and viewer stream chunk differ: "
2102 "vsession chunk %p vstream chunk %p",
2103 conn->viewer_session->current_trace_chunk,
2104 vstream->stream_file.trace_chunk);
2105 lttng_trace_chunk_put(vstream->stream_file.trace_chunk);
2106 acquired_reference = lttng_trace_chunk_get(conn->viewer_session->current_trace_chunk);
2107 LTTNG_ASSERT(acquired_reference);
2108 vstream->stream_file.trace_chunk =
2109 conn->viewer_session->current_trace_chunk;
2110 viewer_stream_close_files(vstream);
2111 }
2112
2113 len = vstream->stream->metadata_received - vstream->metadata_sent;
2114
2115 if (!vstream->stream_file.trace_chunk) {
2116 reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
2117 len = 0;
2118 goto send_reply;
2119 } else if (vstream->stream_file.trace_chunk &&
2120 !vstream->stream_file.handle && len > 0) {
2121 /*
2122 * Either this is the first time the metadata file is read, or a
2123 * rotation of the corresponding relay stream has occurred.
2124 */
2125 struct fs_handle *fs_handle;
2126 char file_path[LTTNG_PATH_MAX];
2127 enum lttng_trace_chunk_status status;
2128 struct relay_stream *rstream = vstream->stream;
2129
2130 ret = utils_stream_file_path(rstream->path_name,
2131 rstream->channel_name, rstream->tracefile_size,
2132 vstream->current_tracefile_id, NULL, file_path,
2133 sizeof(file_path));
2134 if (ret < 0) {
2135 goto error;
2136 }
2137
2138 /*
2139 * It is possible the the metadata file we are trying to open is
2140 * missing if the stream has been closed (application exits with
2141 * per-pid buffers) and a clear command has been performed.
2142 */
2143 status = lttng_trace_chunk_open_fs_handle(
2144 vstream->stream_file.trace_chunk,
2145 file_path, O_RDONLY, 0, &fs_handle, true);
2146 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
2147 if (status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE) {
2148 reply.status = htobe32(LTTNG_VIEWER_NO_NEW_METADATA);
2149 len = 0;
2150 if (vstream->stream->closed) {
2151 viewer_stream_put(vstream);
2152 }
2153 goto send_reply;
2154 }
2155 PERROR("Failed to open metadata file for viewer stream");
2156 goto error;
2157 }
2158 vstream->stream_file.handle = fs_handle;
2159
2160 if (vstream->metadata_sent != 0) {
2161 /*
2162 * The client does not expect to receive any metadata
2163 * it has received and metadata files in successive
2164 * chunks must be a strict superset of one another.
2165 *
2166 * Skip the first `metadata_sent` bytes to ensure
2167 * they are not sent a second time to the client.
2168 *
2169 * Baring a block layer error or an internal error,
2170 * this seek should not fail as
2171 * `vstream->stream->metadata_received` is reset when
2172 * a relay stream is rotated. If this is reached, it is
2173 * safe to assume that
2174 * `metadata_received` > `metadata_sent`.
2175 */
2176 const off_t seek_ret = fs_handle_seek(fs_handle,
2177 vstream->metadata_sent, SEEK_SET);
2178
2179 if (seek_ret < 0) {
2180 PERROR("Failed to seek metadata viewer stream file to `sent` position: pos = %" PRId64,
2181 vstream->metadata_sent);
2182 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2183 goto send_reply;
2184 }
2185 }
2186 }
2187
2188 reply.len = htobe64(len);
2189 data = (char *) zmalloc(len);
2190 if (!data) {
2191 PERROR("viewer metadata zmalloc");
2192 goto error;
2193 }
2194
2195 fd = fs_handle_get_fd(vstream->stream_file.handle);
2196 if (fd < 0) {
2197 ERR("Failed to restore viewer stream file system handle");
2198 goto error;
2199 }
2200 read_len = lttng_read(fd, data, len);
2201 fs_handle_put_fd(vstream->stream_file.handle);
2202 fd = -1;
2203 if (read_len < len) {
2204 if (read_len < 0) {
2205 PERROR("Failed to read metadata file");
2206 goto error;
2207 } else {
2208 /*
2209 * A clear has been performed which prevents the relay
2210 * from sending `len` bytes of metadata.
2211 *
2212 * It is important not to send any metadata if we
2213 * couldn't read all the available metadata in one shot:
2214 * sending partial metadata can cause the client to
2215 * attempt to parse an incomplete (incoherent) metadata
2216 * stream, which would result in an error.
2217 */
2218 const off_t seek_ret = fs_handle_seek(
2219 vstream->stream_file.handle, -read_len,
2220 SEEK_CUR);
2221
2222 DBG("Failed to read metadata: requested = %" PRIu64 ", got = %zd",
2223 len, read_len);
2224 read_len = 0;
2225 len = 0;
2226 if (seek_ret < 0) {
2227 PERROR("Failed to restore metadata file position after partial read");
2228 ret = -1;
2229 goto error;
2230 }
2231 }
2232 }
2233 vstream->metadata_sent += read_len;
2234 reply.status = htobe32(LTTNG_VIEWER_METADATA_OK);
2235
2236 goto send_reply;
2237
2238 error:
2239 reply.status = htobe32(LTTNG_VIEWER_METADATA_ERR);
2240
2241 send_reply:
2242 health_code_update();
2243 if (vstream) {
2244 pthread_mutex_unlock(&vstream->stream->lock);
2245 }
2246 ret = send_response(conn->sock, &reply, sizeof(reply));
2247 if (ret < 0) {
2248 goto end_free;
2249 }
2250 health_code_update();
2251
2252 if (len > 0) {
2253 ret = send_response(conn->sock, data, len);
2254 if (ret < 0) {
2255 goto end_free;
2256 }
2257 }
2258
2259 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
2260 (uint64_t) be64toh(request.stream_id));
2261
2262 DBG("Metadata sent");
2263
2264 end_free:
2265 free(data);
2266 end:
2267 if (vstream) {
2268 viewer_stream_put(vstream);
2269 }
2270 return ret;
2271 }
2272
2273 /*
2274 * Create a viewer session.
2275 *
2276 * Return 0 on success or else a negative value.
2277 */
2278 static
2279 int viewer_create_session(struct relay_connection *conn)
2280 {
2281 int ret;
2282 struct lttng_viewer_create_session_response resp;
2283
2284 DBG("Viewer create session received");
2285
2286 memset(&resp, 0, sizeof(resp));
2287 resp.status = htobe32(LTTNG_VIEWER_CREATE_SESSION_OK);
2288 conn->viewer_session = viewer_session_create();
2289 if (!conn->viewer_session) {
2290 ERR("Allocation viewer session");
2291 resp.status = htobe32(LTTNG_VIEWER_CREATE_SESSION_ERR);
2292 goto send_reply;
2293 }
2294
2295 send_reply:
2296 health_code_update();
2297 ret = send_response(conn->sock, &resp, sizeof(resp));
2298 if (ret < 0) {
2299 goto end;
2300 }
2301 health_code_update();
2302 ret = 0;
2303
2304 end:
2305 return ret;
2306 }
2307
2308 /*
2309 * Detach a viewer session.
2310 *
2311 * Return 0 on success or else a negative value.
2312 */
2313 static
2314 int viewer_detach_session(struct relay_connection *conn)
2315 {
2316 int ret;
2317 struct lttng_viewer_detach_session_response response;
2318 struct lttng_viewer_detach_session_request request;
2319 struct relay_session *session = NULL;
2320 uint64_t viewer_session_to_close;
2321
2322 DBG("Viewer detach session received");
2323
2324 LTTNG_ASSERT(conn);
2325
2326 health_code_update();
2327
2328 /* Receive the request from the connected client. */
2329 ret = recv_request(conn->sock, &request, sizeof(request));
2330 if (ret < 0) {
2331 goto end;
2332 }
2333 viewer_session_to_close = be64toh(request.session_id);
2334
2335 if (!conn->viewer_session) {
2336 DBG("Client trying to detach before creating a live viewer session");
2337 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_ERR);
2338 goto send_reply;
2339 }
2340
2341 health_code_update();
2342
2343 memset(&response, 0, sizeof(response));
2344 DBG("Detaching from session ID %" PRIu64, viewer_session_to_close);
2345
2346 session = session_get_by_id(be64toh(request.session_id));
2347 if (!session) {
2348 DBG("Relay session %" PRIu64 " not found",
2349 (uint64_t) be64toh(request.session_id));
2350 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_UNK);
2351 goto send_reply;
2352 }
2353
2354 ret = viewer_session_is_attached(conn->viewer_session, session);
2355 if (ret != 1) {
2356 DBG("Not attached to this session");
2357 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_ERR);
2358 goto send_reply_put;
2359 }
2360
2361 viewer_session_close_one_session(conn->viewer_session, session);
2362 response.status = htobe32(LTTNG_VIEWER_DETACH_SESSION_OK);
2363 DBG("Session %" PRIu64 " detached.", viewer_session_to_close);
2364
2365 send_reply_put:
2366 session_put(session);
2367
2368 send_reply:
2369 health_code_update();
2370 ret = send_response(conn->sock, &response, sizeof(response));
2371 if (ret < 0) {
2372 goto end;
2373 }
2374 health_code_update();
2375 ret = 0;
2376
2377 end:
2378 return ret;
2379 }
2380
2381 /*
2382 * live_relay_unknown_command: send -1 if received unknown command
2383 */
2384 static
2385 void live_relay_unknown_command(struct relay_connection *conn)
2386 {
2387 struct lttcomm_relayd_generic_reply reply;
2388
2389 memset(&reply, 0, sizeof(reply));
2390 reply.ret_code = htobe32(LTTNG_ERR_UNK);
2391 (void) send_response(conn->sock, &reply, sizeof(reply));
2392 }
2393
2394 /*
2395 * Process the commands received on the control socket
2396 */
2397 static
2398 int process_control(struct lttng_viewer_cmd *recv_hdr,
2399 struct relay_connection *conn)
2400 {
2401 int ret = 0;
2402 uint32_t msg_value;
2403
2404 msg_value = be32toh(recv_hdr->cmd);
2405
2406 /*
2407 * Make sure we've done the version check before any command other then a
2408 * new client connection.
2409 */
2410 if (msg_value != LTTNG_VIEWER_CONNECT && !conn->version_check_done) {
2411 ERR("Viewer conn value %" PRIu32 " before version check", msg_value);
2412 ret = -1;
2413 goto end;
2414 }
2415
2416 switch (msg_value) {
2417 case LTTNG_VIEWER_CONNECT:
2418 ret = viewer_connect(conn);
2419 break;
2420 case LTTNG_VIEWER_LIST_SESSIONS:
2421 ret = viewer_list_sessions(conn);
2422 break;
2423 case LTTNG_VIEWER_ATTACH_SESSION:
2424 ret = viewer_attach_session(conn);
2425 break;
2426 case LTTNG_VIEWER_GET_NEXT_INDEX:
2427 ret = viewer_get_next_index(conn);
2428 break;
2429 case LTTNG_VIEWER_GET_PACKET:
2430 ret = viewer_get_packet(conn);
2431 break;
2432 case LTTNG_VIEWER_GET_METADATA:
2433 ret = viewer_get_metadata(conn);
2434 break;
2435 case LTTNG_VIEWER_GET_NEW_STREAMS:
2436 ret = viewer_get_new_streams(conn);
2437 break;
2438 case LTTNG_VIEWER_CREATE_SESSION:
2439 ret = viewer_create_session(conn);
2440 break;
2441 case LTTNG_VIEWER_DETACH_SESSION:
2442 ret = viewer_detach_session(conn);
2443 break;
2444 default:
2445 ERR("Received unknown viewer command (%u)",
2446 be32toh(recv_hdr->cmd));
2447 live_relay_unknown_command(conn);
2448 ret = -1;
2449 goto end;
2450 }
2451
2452 end:
2453 return ret;
2454 }
2455
2456 static
2457 void cleanup_connection_pollfd(struct lttng_poll_event *events, int pollfd)
2458 {
2459 int ret;
2460
2461 (void) lttng_poll_del(events, pollfd);
2462
2463 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker, &pollfd, 1,
2464 fd_tracker_util_close_fd, NULL);
2465 if (ret < 0) {
2466 ERR("Closing pollfd %d", pollfd);
2467 }
2468 }
2469
2470 /*
2471 * This thread does the actual work
2472 */
2473 static
2474 void *thread_worker(void *data)
2475 {
2476 int ret, err = -1;
2477 uint32_t nb_fd;
2478 struct lttng_poll_event events;
2479 struct lttng_ht *viewer_connections_ht;
2480 struct lttng_ht_iter iter;
2481 struct lttng_viewer_cmd recv_hdr;
2482 struct relay_connection *destroy_conn;
2483
2484 DBG("[thread] Live viewer relay worker started");
2485
2486 rcu_register_thread();
2487
2488 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
2489
2490 if (testpoint(relayd_thread_live_worker)) {
2491 goto error_testpoint;
2492 }
2493
2494 /* table of connections indexed on socket */
2495 viewer_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
2496 if (!viewer_connections_ht) {
2497 goto viewer_connections_ht_error;
2498 }
2499
2500 ret = create_named_thread_poll_set(&events, 2,
2501 "Live viewer worker thread epoll");
2502 if (ret < 0) {
2503 goto error_poll_create;
2504 }
2505
2506 ret = lttng_poll_add(&events, live_conn_pipe[0], LPOLLIN | LPOLLRDHUP);
2507 if (ret < 0) {
2508 goto error;
2509 }
2510
2511 restart:
2512 while (1) {
2513 int i;
2514
2515 health_code_update();
2516
2517 /* Infinite blocking call, waiting for transmission */
2518 DBG3("Relayd live viewer worker thread polling...");
2519 health_poll_entry();
2520 ret = lttng_poll_wait(&events, -1);
2521 health_poll_exit();
2522 if (ret < 0) {
2523 /*
2524 * Restart interrupted system call.
2525 */
2526 if (errno == EINTR) {
2527 goto restart;
2528 }
2529 goto error;
2530 }
2531
2532 nb_fd = ret;
2533
2534 /*
2535 * Process control. The control connection is prioritised so we don't
2536 * starve it with high throughput tracing data on the data
2537 * connection.
2538 */
2539 for (i = 0; i < nb_fd; i++) {
2540 /* Fetch once the poll data */
2541 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
2542 int pollfd = LTTNG_POLL_GETFD(&events, i);
2543
2544 health_code_update();
2545
2546 /* Thread quit pipe has been closed. Killing thread. */
2547 ret = check_thread_quit_pipe(pollfd, revents);
2548 if (ret) {
2549 err = 0;
2550 goto exit;
2551 }
2552
2553 /* Inspect the relay conn pipe for new connection. */
2554 if (pollfd == live_conn_pipe[0]) {
2555 if (revents & LPOLLIN) {
2556 struct relay_connection *conn;
2557
2558 ret = lttng_read(live_conn_pipe[0],
2559 &conn, sizeof(conn));
2560 if (ret < 0) {
2561 goto error;
2562 }
2563 ret = lttng_poll_add(&events,
2564 conn->sock->fd,
2565 LPOLLIN | LPOLLRDHUP);
2566 if (ret) {
2567 ERR("Failed to add new live connection file descriptor to poll set");
2568 goto error;
2569 }
2570 connection_ht_add(viewer_connections_ht, conn);
2571 DBG("Connection socket %d added to poll", conn->sock->fd);
2572 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2573 ERR("Relay live pipe error");
2574 goto error;
2575 } else {
2576 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2577 goto error;
2578 }
2579 } else {
2580 /* Connection activity. */
2581 struct relay_connection *conn;
2582
2583 conn = connection_get_by_sock(viewer_connections_ht, pollfd);
2584 if (!conn) {
2585 continue;
2586 }
2587
2588 if (revents & LPOLLIN) {
2589 ret = conn->sock->ops->recvmsg(conn->sock, &recv_hdr,
2590 sizeof(recv_hdr), 0);
2591 if (ret <= 0) {
2592 /* Connection closed. */
2593 cleanup_connection_pollfd(&events, pollfd);
2594 /* Put "create" ownership reference. */
2595 connection_put(conn);
2596 DBG("Viewer control conn closed with %d", pollfd);
2597 } else {
2598 ret = process_control(&recv_hdr, conn);
2599 if (ret < 0) {
2600 /* Clear the session on error. */
2601 cleanup_connection_pollfd(&events, pollfd);
2602 /* Put "create" ownership reference. */
2603 connection_put(conn);
2604 DBG("Viewer connection closed with %d", pollfd);
2605 }
2606 }
2607 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
2608 cleanup_connection_pollfd(&events, pollfd);
2609 /* Put "create" ownership reference. */
2610 connection_put(conn);
2611 } else {
2612 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
2613 connection_put(conn);
2614 goto error;
2615 }
2616 /* Put local "get_by_sock" reference. */
2617 connection_put(conn);
2618 }
2619 }
2620 }
2621
2622 exit:
2623 error:
2624 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
2625
2626 /* Cleanup remaining connection object. */
2627 rcu_read_lock();
2628 cds_lfht_for_each_entry(viewer_connections_ht->ht, &iter.iter,
2629 destroy_conn,
2630 sock_n.node) {
2631 health_code_update();
2632 connection_put(destroy_conn);
2633 }
2634 rcu_read_unlock();
2635 error_poll_create:
2636 lttng_ht_destroy(viewer_connections_ht);
2637 viewer_connections_ht_error:
2638 /* Close relay conn pipes */
2639 (void) fd_tracker_util_pipe_close(the_fd_tracker, live_conn_pipe);
2640 if (err) {
2641 DBG("Viewer worker thread exited with error");
2642 }
2643 DBG("Viewer worker thread cleanup complete");
2644 error_testpoint:
2645 if (err) {
2646 health_error();
2647 ERR("Health error occurred in %s", __func__);
2648 }
2649 health_unregister(health_relayd);
2650 if (lttng_relay_stop_threads()) {
2651 ERR("Error stopping threads");
2652 }
2653 rcu_unregister_thread();
2654 return NULL;
2655 }
2656
2657 /*
2658 * Create the relay command pipe to wake thread_manage_apps.
2659 * Closed in cleanup().
2660 */
2661 static int create_conn_pipe(void)
2662 {
2663 return fd_tracker_util_pipe_open_cloexec(the_fd_tracker,
2664 "Live connection pipe", live_conn_pipe);
2665 }
2666
2667 int relayd_live_join(void)
2668 {
2669 int ret, retval = 0;
2670 void *status;
2671
2672 ret = pthread_join(live_listener_thread, &status);
2673 if (ret) {
2674 errno = ret;
2675 PERROR("pthread_join live listener");
2676 retval = -1;
2677 }
2678
2679 ret = pthread_join(live_worker_thread, &status);
2680 if (ret) {
2681 errno = ret;
2682 PERROR("pthread_join live worker");
2683 retval = -1;
2684 }
2685
2686 ret = pthread_join(live_dispatcher_thread, &status);
2687 if (ret) {
2688 errno = ret;
2689 PERROR("pthread_join live dispatcher");
2690 retval = -1;
2691 }
2692
2693 cleanup_relayd_live();
2694
2695 return retval;
2696 }
2697
2698 /*
2699 * main
2700 */
2701 int relayd_live_create(struct lttng_uri *uri)
2702 {
2703 int ret = 0, retval = 0;
2704 void *status;
2705 int is_root;
2706
2707 if (!uri) {
2708 retval = -1;
2709 goto exit_init_data;
2710 }
2711 live_uri = uri;
2712
2713 /* Check if daemon is UID = 0 */
2714 is_root = !getuid();
2715
2716 if (!is_root) {
2717 if (live_uri->port < 1024) {
2718 ERR("Need to be root to use ports < 1024");
2719 retval = -1;
2720 goto exit_init_data;
2721 }
2722 }
2723
2724 /* Setup the thread apps communication pipe. */
2725 if (create_conn_pipe()) {
2726 retval = -1;
2727 goto exit_init_data;
2728 }
2729
2730 /* Init relay command queue. */
2731 cds_wfcq_init(&viewer_conn_queue.head, &viewer_conn_queue.tail);
2732
2733 /* Set up max poll set size */
2734 if (lttng_poll_set_max_size()) {
2735 retval = -1;
2736 goto exit_init_data;
2737 }
2738
2739 /* Setup the dispatcher thread */
2740 ret = pthread_create(&live_dispatcher_thread, default_pthread_attr(),
2741 thread_dispatcher, (void *) NULL);
2742 if (ret) {
2743 errno = ret;
2744 PERROR("pthread_create viewer dispatcher");
2745 retval = -1;
2746 goto exit_dispatcher_thread;
2747 }
2748
2749 /* Setup the worker thread */
2750 ret = pthread_create(&live_worker_thread, default_pthread_attr(),
2751 thread_worker, NULL);
2752 if (ret) {
2753 errno = ret;
2754 PERROR("pthread_create viewer worker");
2755 retval = -1;
2756 goto exit_worker_thread;
2757 }
2758
2759 /* Setup the listener thread */
2760 ret = pthread_create(&live_listener_thread, default_pthread_attr(),
2761 thread_listener, (void *) NULL);
2762 if (ret) {
2763 errno = ret;
2764 PERROR("pthread_create viewer listener");
2765 retval = -1;
2766 goto exit_listener_thread;
2767 }
2768
2769 /*
2770 * All OK, started all threads.
2771 */
2772 return retval;
2773
2774 /*
2775 * Join on the live_listener_thread should anything be added after
2776 * the live_listener thread's creation.
2777 */
2778
2779 exit_listener_thread:
2780
2781 ret = pthread_join(live_worker_thread, &status);
2782 if (ret) {
2783 errno = ret;
2784 PERROR("pthread_join live worker");
2785 retval = -1;
2786 }
2787 exit_worker_thread:
2788
2789 ret = pthread_join(live_dispatcher_thread, &status);
2790 if (ret) {
2791 errno = ret;
2792 PERROR("pthread_join live dispatcher");
2793 retval = -1;
2794 }
2795 exit_dispatcher_thread:
2796
2797 exit_init_data:
2798 cleanup_relayd_live();
2799
2800 return retval;
2801 }
This page took 0.134151 seconds and 4 git commands to generate.