Fix: big relayd cleanup and refactor
[lttng-tools.git] / src / bin / lttng-relayd / live.c
CommitLineData
d3e2ba59
JD
1/*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#define _GNU_SOURCE
20#include <getopt.h>
21#include <grp.h>
22#include <limits.h>
23#include <pthread.h>
24#include <signal.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/mman.h>
29#include <sys/mount.h>
30#include <sys/resource.h>
31#include <sys/socket.h>
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <sys/wait.h>
35#include <inttypes.h>
36#include <urcu/futex.h>
37#include <urcu/uatomic.h>
38#include <unistd.h>
39#include <fcntl.h>
40#include <config.h>
41
42#include <lttng/lttng.h>
43#include <common/common.h>
44#include <common/compat/poll.h>
45#include <common/compat/socket.h>
46#include <common/defaults.h>
47#include <common/futex.h>
2f8f53af 48#include <common/index/index.h>
d3e2ba59
JD
49#include <common/sessiond-comm/sessiond-comm.h>
50#include <common/sessiond-comm/inet.h>
51#include <common/sessiond-comm/relayd.h>
52#include <common/uri.h>
53#include <common/utils.h>
54
55#include "cmd.h"
56#include "live.h"
57#include "lttng-relayd.h"
d3e2ba59 58#include "utils.h"
eea7556c 59#include "health-relayd.h"
9b5e0863 60#include "testpoint.h"
2f8f53af 61#include "viewer-stream.h"
2a174661
DG
62#include "stream.h"
63#include "session.h"
64#include "ctf-trace.h"
d3e2ba59
JD
65
66static struct lttng_uri *live_uri;
67
d3e2ba59
JD
68/*
69 * This pipe is used to inform the worker thread that a command is queued and
70 * ready to be processed.
71 */
72static int live_relay_cmd_pipe[2] = { -1, -1 };
73
74/* Shared between threads */
75static int live_dispatch_thread_exit;
76
77static pthread_t live_listener_thread;
78static pthread_t live_dispatcher_thread;
79static pthread_t live_worker_thread;
80
81/*
82 * Relay command queue.
83 *
84 * The live_thread_listener and live_thread_dispatcher communicate with this
85 * queue.
86 */
87static struct relay_cmd_queue viewer_cmd_queue;
88
89static uint64_t last_relay_viewer_session_id;
90
91/*
92 * Cleanup the daemon
93 */
94static
95void cleanup(void)
96{
97 DBG("Cleaning up");
98
d3e2ba59
JD
99 free(live_uri);
100}
101
2f8f53af
DG
102/*
103 * Receive a request buffer using a given socket, destination allocated buffer
104 * of length size.
105 *
106 * Return the size of the received message or else a negative value on error
107 * with errno being set by recvmsg() syscall.
108 */
109static
110ssize_t recv_request(struct lttcomm_sock *sock, void *buf, size_t size)
111{
112 ssize_t ret;
113
114 assert(sock);
115 assert(buf);
116
117 ret = sock->ops->recvmsg(sock, buf, size, 0);
118 if (ret < 0 || ret != size) {
119 if (ret == 0) {
120 /* Orderly shutdown. Not necessary to print an error. */
121 DBG("Socket %d did an orderly shutdown", sock->fd);
122 } else {
123 ERR("Relay failed to receive request.");
124 }
125 ret = -1;
126 }
127
128 return ret;
129}
130
131/*
132 * Send a response buffer using a given socket, source allocated buffer of
133 * length size.
134 *
135 * Return the size of the sent message or else a negative value on error with
136 * errno being set by sendmsg() syscall.
137 */
138static
139ssize_t send_response(struct lttcomm_sock *sock, void *buf, size_t size)
140{
141 ssize_t ret;
142
143 assert(sock);
144 assert(buf);
145
146 ret = sock->ops->sendmsg(sock, buf, size, 0);
147 if (ret < 0) {
148 ERR("Relayd failed to send response.");
149 }
150
151 return ret;
152}
153
154/*
155 * Atomically check if new streams got added in the session since the last
156 * check and reset the flag to 0.
157 *
158 * Returns 1 if new streams got added, 0 if nothing changed, a negative value
159 * on error.
160 */
161static
162int check_new_streams(uint64_t session_id, struct lttng_ht *sessions_ht)
163{
164 int ret;
165 unsigned long current_val;
166 struct relay_session *session;
167
168 assert(sessions_ht);
169
170 session = session_find_by_id(sessions_ht, session_id);
171 if (!session) {
172 DBG("Relay session %" PRIu64 " not found", session_id);
173 ret = -1;
174 goto error;
175 }
176
177 current_val = uatomic_cmpxchg(&session->new_streams, 1, 0);
178 ret = current_val;
179
180error:
181 return ret;
182}
183
184/*
185 * Send viewer streams to the given socket. The ignore_sent_flag indicates if
186 * this function should ignore the sent flag or not.
187 *
188 * Return 0 on success or else a negative value.
189 */
190static
191ssize_t send_viewer_streams(struct lttcomm_sock *sock,
192 struct relay_session *session, unsigned int ignore_sent_flag)
193{
194 ssize_t ret;
195 struct lttng_viewer_stream send_stream;
196 struct lttng_ht_iter iter;
197 struct relay_viewer_stream *vstream;
198
199 assert(session);
200
201 rcu_read_lock();
202
203 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, vstream,
204 stream_n.node) {
2a174661
DG
205 struct ctf_trace *ctf_trace;
206
2f8f53af
DG
207 health_code_update();
208
209 /* Ignore if not the same session. */
210 if (vstream->session_id != session->id ||
211 (!ignore_sent_flag && vstream->sent_flag)) {
212 continue;
213 }
214
2a174661
DG
215 ctf_trace = ctf_trace_find_by_path(session->ctf_traces_ht,
216 vstream->path_name);
217 assert(ctf_trace);
218
2f8f53af 219 send_stream.id = htobe64(vstream->stream_handle);
2a174661 220 send_stream.ctf_trace_id = htobe64(ctf_trace->id);
2f8f53af
DG
221 send_stream.metadata_flag = htobe32(vstream->metadata_flag);
222 strncpy(send_stream.path_name, vstream->path_name,
223 sizeof(send_stream.path_name));
224 strncpy(send_stream.channel_name, vstream->channel_name,
225 sizeof(send_stream.channel_name));
226
227 DBG("Sending stream %" PRIu64 " to viewer", vstream->stream_handle);
228 ret = send_response(sock, &send_stream, sizeof(send_stream));
229 if (ret < 0) {
230 goto end_unlock;
231 }
232 vstream->sent_flag = 1;
233 }
234
235 ret = 0;
236
237end_unlock:
238 rcu_read_unlock();
239 return ret;
240}
241
242/*
243 * Create every viewer stream possible for the given session with the seek
244 * type. Three counters *can* be return which are in order the total amount of
245 * viewer stream of the session, the number of unsent stream and the number of
246 * stream created. Those counters can be NULL and thus will be ignored.
247 *
248 * Return 0 on success or else a negative value.
249 */
250static
251int make_viewer_streams(struct relay_session *session,
252 enum lttng_viewer_seek seek_t, uint32_t *nb_total, uint32_t *nb_unsent,
253 uint32_t *nb_created)
254{
255 int ret;
2f8f53af 256 struct lttng_ht_iter iter;
2a174661 257 struct ctf_trace *ctf_trace;
2f8f53af
DG
258
259 assert(session);
260
261 /*
262 * This is to make sure we create viewer streams for a full received
263 * channel. For instance, if we have 8 streams for a channel that are
264 * concurrently being flagged ready, we can end up creating just a subset
265 * of the 8 streams (the ones that are flagged). This lock avoids this
266 * limbo state.
267 */
268 pthread_mutex_lock(&session->viewer_ready_lock);
269
270 /*
271 * Create viewer streams for relay streams that are ready to be used for a
272 * the given session id only.
273 */
2a174661
DG
274 rcu_read_lock();
275 cds_lfht_for_each_entry(session->ctf_traces_ht->ht, &iter.iter, ctf_trace,
276 node.node) {
277 struct relay_stream *stream;
2f8f53af
DG
278
279 health_code_update();
280
2a174661 281 if (ctf_trace->invalid_flag) {
2f8f53af
DG
282 continue;
283 }
284
2a174661
DG
285 cds_list_for_each_entry(stream, &ctf_trace->stream_list, trace_list) {
286 struct relay_viewer_stream *vstream;
287
288 if (!stream->viewer_ready) {
289 continue;
290 }
291
292 vstream = viewer_stream_find_by_id(stream->stream_handle);
2f8f53af 293 if (!vstream) {
2a174661
DG
294 vstream = viewer_stream_create(stream, seek_t, ctf_trace);
295 if (!vstream) {
296 ret = -1;
297 goto error_unlock;
298 }
299 /* Acquire reference to ctf_trace. */
300 ctf_trace_get_ref(ctf_trace);
301
302 if (nb_created) {
303 /* Update number of created stream counter. */
304 (*nb_created)++;
305 }
306 } else if (!vstream->sent_flag && nb_unsent) {
307 /* Update number of unsent stream counter. */
308 (*nb_unsent)++;
2f8f53af 309 }
2a174661
DG
310 /* Update number of total stream counter. */
311 if (nb_total) {
312 (*nb_total)++;
2f8f53af 313 }
2f8f53af
DG
314 }
315 }
316
317 ret = 0;
318
319error_unlock:
2a174661 320 rcu_read_unlock();
2f8f53af
DG
321 pthread_mutex_unlock(&session->viewer_ready_lock);
322 return ret;
323}
324
d3e2ba59
JD
325/*
326 * Write to writable pipe used to notify a thread.
327 */
328static
329int notify_thread_pipe(int wpipe)
330{
6cd525e8 331 ssize_t ret;
d3e2ba59 332
6cd525e8
MD
333 ret = lttng_write(wpipe, "!", 1);
334 if (ret < 1) {
d3e2ba59
JD
335 PERROR("write poll pipe");
336 }
337
6cd525e8 338 return (int) ret;
d3e2ba59
JD
339}
340
341/*
342 * Stop all threads by closing the thread quit pipe.
343 */
344static
345void stop_threads(void)
346{
347 int ret;
348
349 /* Stopping all threads */
350 DBG("Terminating all live threads");
2a174661 351 ret = notify_thread_pipe(live_conn_pipe[1]);
d3e2ba59
JD
352 if (ret < 0) {
353 ERR("write error on thread quit pipe");
354 }
355
356 /* Dispatch thread */
357 CMM_STORE_SHARED(live_dispatch_thread_exit, 1);
358 futex_nto1_wake(&viewer_cmd_queue.futex);
359}
360
d3e2ba59
JD
361/*
362 * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
363 */
364static
365int create_thread_poll_set(struct lttng_poll_event *events, int size)
366{
367 int ret;
368
369 if (events == NULL || size == 0) {
370 ret = -1;
371 goto error;
372 }
373
374 ret = lttng_poll_create(events, size, LTTNG_CLOEXEC);
375 if (ret < 0) {
376 goto error;
377 }
378
379 /* Add quit pipe */
2a174661 380 ret = lttng_poll_add(events, live_conn_pipe[0], LPOLLIN | LPOLLERR);
d3e2ba59
JD
381 if (ret < 0) {
382 goto error;
383 }
384
385 return 0;
386
387error:
388 return ret;
389}
390
391/*
392 * Check if the thread quit pipe was triggered.
393 *
394 * Return 1 if it was triggered else 0;
395 */
396static
2a174661 397int check_live_conn_pipe(int fd, uint32_t events)
d3e2ba59 398{
2a174661 399 if (fd == live_conn_pipe[0] && (events & LPOLLIN)) {
d3e2ba59
JD
400 return 1;
401 }
402
403 return 0;
404}
405
406/*
407 * Create and init socket from uri.
408 */
409static
410struct lttcomm_sock *init_socket(struct lttng_uri *uri)
411{
412 int ret;
413 struct lttcomm_sock *sock = NULL;
414
415 sock = lttcomm_alloc_sock_from_uri(uri);
416 if (sock == NULL) {
417 ERR("Allocating socket");
418 goto error;
419 }
420
421 ret = lttcomm_create_sock(sock);
422 if (ret < 0) {
423 goto error;
424 }
425 DBG("Listening on sock %d for live", sock->fd);
426
427 ret = sock->ops->bind(sock);
428 if (ret < 0) {
429 goto error;
430 }
431
432 ret = sock->ops->listen(sock, -1);
433 if (ret < 0) {
434 goto error;
435
436 }
437
438 return sock;
439
440error:
441 if (sock) {
442 lttcomm_destroy_sock(sock);
443 }
444 return NULL;
445}
446
447/*
448 * This thread manages the listening for new connections on the network
449 */
450static
451void *thread_listener(void *data)
452{
453 int i, ret, pollfd, err = -1;
454 int val = 1;
455 uint32_t revents, nb_fd;
456 struct lttng_poll_event events;
457 struct lttcomm_sock *live_control_sock;
458
459 DBG("[thread] Relay live listener started");
460
eea7556c
MD
461 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
462
463 health_code_update();
464
d3e2ba59
JD
465 live_control_sock = init_socket(live_uri);
466 if (!live_control_sock) {
467 goto error_sock_control;
468 }
469
fb4d42ab 470 /* Pass 2 as size here for the thread quit pipe and control sockets. */
d3e2ba59
JD
471 ret = create_thread_poll_set(&events, 2);
472 if (ret < 0) {
473 goto error_create_poll;
474 }
475
476 /* Add the control socket */
477 ret = lttng_poll_add(&events, live_control_sock->fd, LPOLLIN | LPOLLRDHUP);
478 if (ret < 0) {
479 goto error_poll_add;
480 }
481
3fd27398
MD
482 lttng_relay_notify_ready();
483
9b5e0863
MD
484 if (testpoint(relayd_thread_live_listener)) {
485 goto error_testpoint;
486 }
487
d3e2ba59 488 while (1) {
eea7556c
MD
489 health_code_update();
490
d3e2ba59
JD
491 DBG("Listener accepting live viewers connections");
492
493restart:
eea7556c 494 health_poll_entry();
d3e2ba59 495 ret = lttng_poll_wait(&events, -1);
eea7556c 496 health_poll_exit();
d3e2ba59
JD
497 if (ret < 0) {
498 /*
499 * Restart interrupted system call.
500 */
501 if (errno == EINTR) {
502 goto restart;
503 }
504 goto error;
505 }
506 nb_fd = ret;
507
508 DBG("Relay new viewer connection received");
509 for (i = 0; i < nb_fd; i++) {
eea7556c
MD
510 health_code_update();
511
d3e2ba59
JD
512 /* Fetch once the poll data */
513 revents = LTTNG_POLL_GETEV(&events, i);
514 pollfd = LTTNG_POLL_GETFD(&events, i);
515
516 /* Thread quit pipe has been closed. Killing thread. */
2a174661 517 ret = check_live_conn_pipe(pollfd, revents);
d3e2ba59
JD
518 if (ret) {
519 err = 0;
520 goto exit;
521 }
522
523 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
524 ERR("socket poll error");
525 goto error;
526 } else if (revents & LPOLLIN) {
527 /*
528 * Get allocated in this thread, enqueued to a global queue,
529 * dequeued and freed in the worker thread.
530 */
531 struct relay_command *relay_cmd;
532 struct lttcomm_sock *newsock;
533
534 relay_cmd = zmalloc(sizeof(*relay_cmd));
535 if (!relay_cmd) {
536 PERROR("relay command zmalloc");
537 goto error;
538 }
539
540 assert(pollfd == live_control_sock->fd);
541 newsock = live_control_sock->ops->accept(live_control_sock);
542 if (!newsock) {
543 PERROR("accepting control sock");
544 free(relay_cmd);
545 goto error;
546 }
547 DBG("Relay viewer connection accepted socket %d", newsock->fd);
548 ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val,
549 sizeof(int));
550 if (ret < 0) {
551 PERROR("setsockopt inet");
552 lttcomm_destroy_sock(newsock);
553 free(relay_cmd);
554 goto error;
555 }
556 relay_cmd->sock = newsock;
557
558 /*
559 * Lock free enqueue the request.
560 */
561 cds_wfq_enqueue(&viewer_cmd_queue.queue, &relay_cmd->node);
562
563 /*
564 * Wake the dispatch queue futex. Implicit memory
565 * barrier with the exchange in cds_wfq_enqueue.
566 */
567 futex_nto1_wake(&viewer_cmd_queue.futex);
568 }
569 }
570 }
571
572exit:
573error:
574error_poll_add:
9b5e0863 575error_testpoint:
d3e2ba59
JD
576 lttng_poll_clean(&events);
577error_create_poll:
578 if (live_control_sock->fd >= 0) {
579 ret = live_control_sock->ops->close(live_control_sock);
580 if (ret) {
581 PERROR("close");
582 }
583 }
584 lttcomm_destroy_sock(live_control_sock);
585error_sock_control:
586 if (err) {
eea7556c 587 health_error();
d3e2ba59
JD
588 DBG("Live viewer listener thread exited with error");
589 }
eea7556c 590 health_unregister(health_relayd);
d3e2ba59
JD
591 DBG("Live viewer listener thread cleanup complete");
592 stop_threads();
593 return NULL;
594}
595
596/*
597 * This thread manages the dispatching of the requests to worker threads
598 */
599static
600void *thread_dispatcher(void *data)
601{
6cd525e8
MD
602 int err = -1;
603 ssize_t ret;
d3e2ba59
JD
604 struct cds_wfq_node *node;
605 struct relay_command *relay_cmd = NULL;
606
607 DBG("[thread] Live viewer relay dispatcher started");
608
eea7556c
MD
609 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
610
9b5e0863
MD
611 if (testpoint(relayd_thread_live_dispatcher)) {
612 goto error_testpoint;
613 }
614
eea7556c
MD
615 health_code_update();
616
d3e2ba59 617 while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
eea7556c
MD
618 health_code_update();
619
d3e2ba59
JD
620 /* Atomically prepare the queue futex */
621 futex_nto1_prepare(&viewer_cmd_queue.futex);
622
623 do {
eea7556c
MD
624 health_code_update();
625
d3e2ba59
JD
626 /* Dequeue commands */
627 node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
628 if (node == NULL) {
629 DBG("Woken up but nothing in the live-viewer "
630 "relay command queue");
631 /* Continue thread execution */
632 break;
633 }
634
635 relay_cmd = caa_container_of(node, struct relay_command, node);
636 DBG("Dispatching viewer request waiting on sock %d",
637 relay_cmd->sock->fd);
638
639 /*
640 * Inform worker thread of the new request. This call is blocking
641 * so we can be assured that the data will be read at some point in
642 * time or wait to the end of the world :)
643 */
6cd525e8
MD
644 ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
645 sizeof(*relay_cmd));
d3e2ba59 646 free(relay_cmd);
6cd525e8 647 if (ret < sizeof(struct relay_command)) {
d3e2ba59
JD
648 PERROR("write cmd pipe");
649 goto error;
650 }
651 } while (node != NULL);
652
653 /* Futex wait on queue. Blocking call on futex() */
eea7556c 654 health_poll_entry();
d3e2ba59 655 futex_nto1_wait(&viewer_cmd_queue.futex);
eea7556c 656 health_poll_exit();
d3e2ba59
JD
657 }
658
eea7556c
MD
659 /* Normal exit, no error */
660 err = 0;
661
d3e2ba59 662error:
9b5e0863 663error_testpoint:
eea7556c
MD
664 if (err) {
665 health_error();
666 ERR("Health error occurred in %s", __func__);
667 }
668 health_unregister(health_relayd);
d3e2ba59
JD
669 DBG("Live viewer dispatch thread dying");
670 stop_threads();
671 return NULL;
672}
673
674/*
675 * Establish connection with the viewer and check the versions.
676 *
677 * Return 0 on success or else negative value.
678 */
679static
680int viewer_connect(struct relay_command *cmd)
681{
682 int ret;
683 struct lttng_viewer_connect reply, msg;
684
685 assert(cmd);
686
687 cmd->version_check_done = 1;
688
eea7556c
MD
689 health_code_update();
690
2f8f53af
DG
691 DBG("Viewer is establishing a connection to the relayd.");
692
693 ret = recv_request(cmd->sock, &msg, sizeof(msg));
694 if (ret < 0) {
d3e2ba59
JD
695 goto end;
696 }
697
eea7556c
MD
698 health_code_update();
699
d3e2ba59
JD
700 reply.major = RELAYD_VERSION_COMM_MAJOR;
701 reply.minor = RELAYD_VERSION_COMM_MINOR;
702
703 /* Major versions must be the same */
704 if (reply.major != be32toh(msg.major)) {
2f8f53af
DG
705 DBG("Incompatible major versions ([relayd] %u vs [client] %u)",
706 reply.major, be32toh(msg.major));
72180669 707 ret = -1;
d3e2ba59
JD
708 goto end;
709 }
710
711 cmd->major = reply.major;
712 /* We adapt to the lowest compatible version */
713 if (reply.minor <= be32toh(msg.minor)) {
714 cmd->minor = reply.minor;
715 } else {
716 cmd->minor = be32toh(msg.minor);
717 }
718
719 if (be32toh(msg.type) == VIEWER_CLIENT_COMMAND) {
720 cmd->type = RELAY_VIEWER_COMMAND;
721 } else if (be32toh(msg.type) == VIEWER_CLIENT_NOTIFICATION) {
722 cmd->type = RELAY_VIEWER_NOTIFICATION;
723 } else {
724 ERR("Unknown connection type : %u", be32toh(msg.type));
725 ret = -1;
726 goto end;
727 }
728
729 reply.major = htobe32(reply.major);
730 reply.minor = htobe32(reply.minor);
731 if (cmd->type == RELAY_VIEWER_COMMAND) {
732 reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
733 }
eea7556c
MD
734
735 health_code_update();
736
2f8f53af 737 ret = send_response(cmd->sock, &reply, sizeof(reply));
d3e2ba59 738 if (ret < 0) {
2f8f53af 739 goto end;
d3e2ba59
JD
740 }
741
eea7556c
MD
742 health_code_update();
743
d3e2ba59
JD
744 DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
745 ret = 0;
746
747end:
748 return ret;
749}
750
751/*
752 * Send the viewer the list of current sessions.
753 *
754 * Return 0 on success or else a negative value.
755 */
756static
757int viewer_list_sessions(struct relay_command *cmd,
758 struct lttng_ht *sessions_ht)
759{
760 int ret;
761 struct lttng_viewer_list_sessions session_list;
762 unsigned long count;
763 long approx_before, approx_after;
d3e2ba59
JD
764 struct lttng_ht_iter iter;
765 struct lttng_viewer_session send_session;
766 struct relay_session *session;
767
768 DBG("List sessions received");
769
d3e2ba59
JD
770 rcu_read_lock();
771 cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
772 session_list.sessions_count = htobe32(count);
773
eea7556c
MD
774 health_code_update();
775
2f8f53af 776 ret = send_response(cmd->sock, &session_list, sizeof(session_list));
d3e2ba59 777 if (ret < 0) {
d3e2ba59
JD
778 goto end_unlock;
779 }
780
eea7556c
MD
781 health_code_update();
782
2f8f53af
DG
783 cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, session,
784 session_n.node) {
eea7556c
MD
785 health_code_update();
786
d3e2ba59
JD
787 strncpy(send_session.session_name, session->session_name,
788 sizeof(send_session.session_name));
789 strncpy(send_session.hostname, session->hostname,
790 sizeof(send_session.hostname));
791 send_session.id = htobe64(session->id);
792 send_session.live_timer = htobe32(session->live_timer);
2a174661 793 send_session.clients = htobe32(session->viewer_refcount);
87b576ec 794 send_session.streams = htobe32(session->stream_count);
d3e2ba59 795
eea7556c
MD
796 health_code_update();
797
2f8f53af 798 ret = send_response(cmd->sock, &send_session, sizeof(send_session));
d3e2ba59 799 if (ret < 0) {
d3e2ba59
JD
800 goto end_unlock;
801 }
802 }
eea7556c
MD
803 health_code_update();
804
d3e2ba59
JD
805 rcu_read_unlock();
806 ret = 0;
807 goto end;
808
809end_unlock:
810 rcu_read_unlock();
811
812end:
d3e2ba59
JD
813 return ret;
814}
815
80e8027a
JD
816/*
817 * Send the viewer the list of current sessions.
818 */
819static
820int viewer_get_new_streams(struct relay_command *cmd,
821 struct lttng_ht *sessions_ht)
822{
823 int ret, send_streams = 0;
2f8f53af 824 uint32_t nb_created = 0, nb_unsent = 0, nb_streams = 0;
80e8027a
JD
825 struct lttng_viewer_new_streams_request request;
826 struct lttng_viewer_new_streams_response response;
80e8027a
JD
827 struct relay_session *session;
828
829 assert(cmd);
830 assert(sessions_ht);
831
832 DBG("Get new streams received");
833
80e8027a
JD
834 health_code_update();
835
2f8f53af
DG
836 /* Receive the request from the connected client. */
837 ret = recv_request(cmd->sock, &request, sizeof(request));
838 if (ret < 0) {
80e8027a
JD
839 goto error;
840 }
841
842 health_code_update();
843
844 rcu_read_lock();
2f8f53af
DG
845 session = session_find_by_id(sessions_ht, be64toh(request.session_id));
846 if (!session) {
80e8027a
JD
847 DBG("Relay session %" PRIu64 " not found",
848 be64toh(request.session_id));
849 response.status = htobe32(VIEWER_NEW_STREAMS_ERR);
850 goto send_reply;
851 }
852
80e8027a
JD
853 if (cmd->session_id == session->id) {
854 /* We confirmed the viewer is asking for the same session. */
855 send_streams = 1;
856 response.status = htobe32(VIEWER_NEW_STREAMS_OK);
857 } else {
858 send_streams = 0;
859 response.status = htobe32(VIEWER_NEW_STREAMS_ERR);
860 goto send_reply;
861 }
862
2f8f53af
DG
863 if (!send_streams) {
864 goto send_reply;
80e8027a 865 }
80e8027a 866
2f8f53af
DG
867 ret = make_viewer_streams(session, VIEWER_SEEK_LAST, NULL, &nb_unsent,
868 &nb_created);
869 if (ret < 0) {
870 goto end_unlock;
871 }
872 /* Only send back the newly created streams with the unsent ones. */
873 nb_streams = nb_created + nb_unsent;
80e8027a
JD
874 response.streams_count = htobe32(nb_streams);
875
876send_reply:
877 health_code_update();
2f8f53af 878 ret = send_response(cmd->sock, &response, sizeof(response));
80e8027a 879 if (ret < 0) {
80e8027a
JD
880 goto end_unlock;
881 }
882 health_code_update();
883
884 /*
885 * Unknown or empty session, just return gracefully, the viewer knows what
886 * is happening.
887 */
888 if (!send_streams || !nb_streams) {
889 ret = 0;
890 goto end_unlock;
891 }
892
2f8f53af
DG
893 /*
894 * Send stream and *DON'T* ignore the sent flag so every viewer streams
895 * that were not sent from that point will be sent to the viewer.
896 */
897 ret = send_viewer_streams(cmd->sock, session, 0);
898 if (ret < 0) {
899 goto end_unlock;
80e8027a
JD
900 }
901
80e8027a
JD
902end_unlock:
903 rcu_read_unlock();
80e8027a
JD
904error:
905 return ret;
906}
907
d3e2ba59
JD
908/*
909 * Send the viewer the list of current sessions.
910 */
911static
912int viewer_attach_session(struct relay_command *cmd,
92c6ca54 913 struct lttng_ht *sessions_ht)
d3e2ba59 914{
2f8f53af
DG
915 int send_streams = 0;
916 ssize_t ret;
80e8027a 917 uint32_t nb_streams = 0;
2f8f53af 918 enum lttng_viewer_seek seek_type;
d3e2ba59
JD
919 struct lttng_viewer_attach_session_request request;
920 struct lttng_viewer_attach_session_response response;
d3e2ba59
JD
921 struct relay_session *session;
922
923 assert(cmd);
924 assert(sessions_ht);
d3e2ba59 925
eea7556c
MD
926 health_code_update();
927
2f8f53af
DG
928 /* Receive the request from the connected client. */
929 ret = recv_request(cmd->sock, &request, sizeof(request));
930 if (ret < 0) {
d3e2ba59
JD
931 goto error;
932 }
933
eea7556c
MD
934 health_code_update();
935
d3e2ba59 936 rcu_read_lock();
2f8f53af
DG
937 session = session_find_by_id(sessions_ht, be64toh(request.session_id));
938 if (!session) {
d3e2ba59
JD
939 DBG("Relay session %" PRIu64 " not found",
940 be64toh(request.session_id));
941 response.status = htobe32(VIEWER_ATTACH_UNK);
942 goto send_reply;
943 }
2a174661
DG
944 session_viewer_attach(session);
945 DBG("Attach session ID %" PRIu64 " received", be64toh(request.session_id));
d3e2ba59 946
2a174661 947 if (uatomic_read(&session->viewer_refcount) > 1) {
d3e2ba59
JD
948 DBG("Already a viewer attached");
949 response.status = htobe32(VIEWER_ATTACH_ALREADY);
2a174661 950 session_viewer_detach(session);
d3e2ba59
JD
951 goto send_reply;
952 } else if (session->live_timer == 0) {
953 DBG("Not live session");
954 response.status = htobe32(VIEWER_ATTACH_NOT_LIVE);
955 goto send_reply;
956 } else {
d3e2ba59
JD
957 send_streams = 1;
958 response.status = htobe32(VIEWER_ATTACH_OK);
b92fdc2b 959 cmd->session_id = session->id;
d3e2ba59
JD
960 cmd->session = session;
961 }
962
963 switch (be32toh(request.seek)) {
964 case VIEWER_SEEK_BEGINNING:
d3e2ba59 965 case VIEWER_SEEK_LAST:
2f8f53af 966 seek_type = be32toh(request.seek);
d3e2ba59
JD
967 break;
968 default:
969 ERR("Wrong seek parameter");
970 response.status = htobe32(VIEWER_ATTACH_SEEK_ERR);
971 send_streams = 0;
972 goto send_reply;
973 }
974
2f8f53af
DG
975 if (!send_streams) {
976 goto send_reply;
977 }
a4baae1b 978
2f8f53af
DG
979 ret = make_viewer_streams(session, seek_type, &nb_streams, NULL, NULL);
980 if (ret < 0) {
981 goto end_unlock;
d3e2ba59 982 }
2f8f53af 983 response.streams_count = htobe32(nb_streams);
d3e2ba59
JD
984
985send_reply:
eea7556c 986 health_code_update();
2f8f53af 987 ret = send_response(cmd->sock, &response, sizeof(response));
d3e2ba59 988 if (ret < 0) {
d3e2ba59
JD
989 goto end_unlock;
990 }
eea7556c 991 health_code_update();
d3e2ba59
JD
992
993 /*
157df586 994 * Unknown or empty session, just return gracefully, the viewer knows what
d3e2ba59
JD
995 * is happening.
996 */
157df586 997 if (!send_streams || !nb_streams) {
d3e2ba59
JD
998 ret = 0;
999 goto end_unlock;
1000 }
1001
2f8f53af
DG
1002 /* Send stream and ignore the sent flag. */
1003 ret = send_viewer_streams(cmd->sock, session, 1);
1004 if (ret < 0) {
1005 goto end_unlock;
d3e2ba59 1006 }
d3e2ba59
JD
1007
1008end_unlock:
1009 rcu_read_unlock();
4a9daf17
JD
1010error:
1011 return ret;
1012}
1013
d3e2ba59
JD
1014/*
1015 * Send the next index for a stream.
1016 *
1017 * Return 0 on success or else a negative value.
1018 */
1019static
1020int viewer_get_next_index(struct relay_command *cmd,
92c6ca54 1021 struct lttng_ht *sessions_ht)
d3e2ba59
JD
1022{
1023 int ret;
1024 struct lttng_viewer_get_next_index request_index;
1025 struct lttng_viewer_index viewer_index;
50adc264 1026 struct ctf_packet_index packet_index;
d3e2ba59
JD
1027 struct relay_viewer_stream *vstream;
1028 struct relay_stream *rstream;
2a174661
DG
1029 struct ctf_trace *ctf_trace;
1030 struct relay_session *session;
d3e2ba59
JD
1031
1032 assert(cmd);
d3e2ba59
JD
1033 assert(sessions_ht);
1034
1035 DBG("Viewer get next index");
1036
eea7556c 1037 health_code_update();
2f8f53af
DG
1038
1039 ret = recv_request(cmd->sock, &request_index, sizeof(request_index));
1040 if (ret < 0) {
d3e2ba59
JD
1041 goto end;
1042 }
eea7556c 1043 health_code_update();
d3e2ba59
JD
1044
1045 rcu_read_lock();
2a174661
DG
1046 session = session_find_by_id(sessions_ht, cmd->session_id);
1047 if (!session) {
1048 ret = -1;
1049 goto end_unlock;
1050 }
1051
2f8f53af 1052 vstream = viewer_stream_find_by_id(be64toh(request_index.stream_id));
d3e2ba59
JD
1053 if (!vstream) {
1054 ret = -1;
1055 goto end_unlock;
1056 }
1057
2a174661
DG
1058 ctf_trace = ctf_trace_find_by_path(session->ctf_traces_ht, vstream->path_name);
1059 assert(ctf_trace);
1060
d3e2ba59
JD
1061 memset(&viewer_index, 0, sizeof(viewer_index));
1062
1063 /*
1064 * The viewer should not ask for index on metadata stream.
1065 */
1066 if (vstream->metadata_flag) {
1067 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
1068 goto send_reply;
1069 }
1070
1071 /* First time, we open the index file */
1072 if (vstream->index_read_fd < 0) {
2f8f53af
DG
1073 ret = index_open(vstream->path_name, vstream->channel_name,
1074 vstream->tracefile_count, vstream->tracefile_count_current);
0e6830aa 1075 if (ret == -ENOENT) {
d3e2ba59
JD
1076 /*
1077 * The index is created only when the first data packet arrives, it
1078 * might not be ready at the beginning of the session
1079 */
1080 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1081 goto send_reply;
1082 } else if (ret < 0) {
1083 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
1084 goto send_reply;
1085 }
2f8f53af 1086 vstream->index_read_fd = ret;
d3e2ba59
JD
1087 }
1088
2a174661
DG
1089 rstream = stream_find_by_id(relay_streams_ht, vstream->stream_handle);
1090 assert(rstream);
1091
1092 if (!rstream->close_flag) {
6b6b9a5a
JD
1093 if (vstream->abort_flag) {
1094 /* Rotate on abort (overwrite). */
1095 DBG("Viewer rotate because of overwrite");
2f8f53af 1096 ret = viewer_stream_rotate(vstream, rstream);
6b6b9a5a
JD
1097 if (ret < 0) {
1098 goto end_unlock;
a020f610
JD
1099 } else if (ret == 1) {
1100 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
2f8f53af 1101 viewer_stream_delete(vstream);
2a174661 1102 viewer_stream_destroy(ctf_trace, vstream);
a020f610 1103 goto send_reply;
6b6b9a5a 1104 }
2a174661 1105 /* ret == 0 means successful so we continue. */
6b6b9a5a 1106 }
2a174661 1107
6b6b9a5a 1108 pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
cef0f7d5
JD
1109 if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
1110 if (rstream->beacon_ts_end != -1ULL &&
1111 vstream->last_sent_index == rstream->total_index_received) {
1112 viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
1113 viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
1114 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1115 goto send_reply;
cef0f7d5
JD
1116 } else if (rstream->total_index_received <= vstream->last_sent_index
1117 && !vstream->close_write_flag) {
2a174661
DG
1118 /*
1119 * Reader and writer are working in the same tracefile, so we care
1120 * about the number of index received and sent. Otherwise, we read
1121 * up to EOF.
1122 */
cef0f7d5
JD
1123 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
1124 /* No new index to send, retry later. */
1125 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1126 goto send_reply;
1127 }
d3e2ba59 1128 }
6b6b9a5a 1129 pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
2a174661 1130 } else if (rstream->close_flag && vstream->close_write_flag &&
d3e2ba59 1131 vstream->total_index_received == vstream->last_sent_index) {
6b6b9a5a 1132 /* Last index sent and current tracefile closed in write */
d3e2ba59 1133 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
2f8f53af 1134 viewer_stream_delete(vstream);
2a174661 1135 viewer_stream_destroy(ctf_trace, vstream);
d3e2ba59 1136 goto send_reply;
6b6b9a5a
JD
1137 } else {
1138 vstream->close_write_flag = 1;
d3e2ba59
JD
1139 }
1140
2a174661
DG
1141 if (!ctf_trace->metadata_received ||
1142 ctf_trace->metadata_received > ctf_trace->metadata_sent) {
d3e2ba59
JD
1143 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
1144 }
1145
4a9daf17
JD
1146 ret = check_new_streams(vstream->session_id, sessions_ht);
1147 if (ret < 0) {
1148 goto end_unlock;
1149 } else if (ret == 1) {
1150 viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1151 }
1152
cef0f7d5
JD
1153 pthread_mutex_lock(&vstream->overwrite_lock);
1154 if (vstream->abort_flag) {
1155 /*
2a174661 1156 * The file is being overwritten by the writer, we cannot * use it.
cef0f7d5
JD
1157 */
1158 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1159 pthread_mutex_unlock(&vstream->overwrite_lock);
2f8f53af 1160 ret = viewer_stream_rotate(vstream, rstream);
cef0f7d5
JD
1161 if (ret < 0) {
1162 goto end_unlock;
a020f610
JD
1163 } else if (ret == 1) {
1164 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
2f8f53af 1165 viewer_stream_delete(vstream);
2a174661 1166 viewer_stream_destroy(ctf_trace, vstream);
a020f610 1167 goto send_reply;
cef0f7d5
JD
1168 }
1169 goto send_reply;
1170 }
2f8f53af 1171
6cd525e8
MD
1172 ret = lttng_read(vstream->index_read_fd, &packet_index,
1173 sizeof(packet_index));
cef0f7d5 1174 pthread_mutex_unlock(&vstream->overwrite_lock);
d3e2ba59 1175 if (ret < sizeof(packet_index)) {
6b6b9a5a
JD
1176 /*
1177 * The tracefile is closed in write, so we read up to EOF.
1178 */
1179 if (vstream->close_write_flag == 1) {
1180 viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
1181 /* Rotate on normal EOF */
2f8f53af 1182 ret = viewer_stream_rotate(vstream, rstream);
6b6b9a5a
JD
1183 if (ret < 0) {
1184 goto end_unlock;
a020f610
JD
1185 } else if (ret == 1) {
1186 viewer_index.status = htobe32(VIEWER_INDEX_HUP);
2f8f53af 1187 viewer_stream_delete(vstream);
2a174661 1188 viewer_stream_destroy(ctf_trace, vstream);
a020f610 1189 goto send_reply;
6b6b9a5a
JD
1190 }
1191 } else {
2f8f53af 1192 PERROR("Relay reading index file %d", vstream->index_read_fd);
cef0f7d5 1193 viewer_index.status = htobe32(VIEWER_INDEX_ERR);
6b6b9a5a
JD
1194 }
1195 goto send_reply;
d3e2ba59
JD
1196 } else {
1197 viewer_index.status = htobe32(VIEWER_INDEX_OK);
1198 vstream->last_sent_index++;
1199 }
1200
1201 /*
1202 * Indexes are stored in big endian, no need to switch before sending.
1203 */
1204 viewer_index.offset = packet_index.offset;
1205 viewer_index.packet_size = packet_index.packet_size;
1206 viewer_index.content_size = packet_index.content_size;
1207 viewer_index.timestamp_begin = packet_index.timestamp_begin;
1208 viewer_index.timestamp_end = packet_index.timestamp_end;
1209 viewer_index.events_discarded = packet_index.events_discarded;
1210 viewer_index.stream_id = packet_index.stream_id;
1211
1212send_reply:
1213 viewer_index.flags = htobe32(viewer_index.flags);
eea7556c 1214 health_code_update();
2f8f53af
DG
1215
1216 ret = send_response(cmd->sock, &viewer_index, sizeof(viewer_index));
d3e2ba59 1217 if (ret < 0) {
d3e2ba59
JD
1218 goto end_unlock;
1219 }
eea7556c 1220 health_code_update();
d3e2ba59 1221
2f8f53af 1222 DBG("Index %" PRIu64 " for stream %" PRIu64 " sent",
d3e2ba59
JD
1223 vstream->last_sent_index, vstream->stream_handle);
1224
1225end_unlock:
1226 rcu_read_unlock();
1227
d3e2ba59
JD
1228end:
1229 return ret;
1230}
1231
1232/*
1233 * Send the next index for a stream
1234 *
1235 * Return 0 on success or else a negative value.
1236 */
1237static
4a9daf17
JD
1238int viewer_get_packet(struct relay_command *cmd,
1239 struct lttng_ht *sessions_ht)
d3e2ba59
JD
1240{
1241 int ret, send_data = 0;
1242 char *data = NULL;
1243 uint32_t len = 0;
1244 ssize_t read_len;
1245 struct lttng_viewer_get_packet get_packet_info;
1246 struct lttng_viewer_trace_packet reply;
1247 struct relay_viewer_stream *stream;
2a174661 1248 struct ctf_trace *ctf_trace;
d3e2ba59
JD
1249
1250 assert(cmd);
d3e2ba59
JD
1251
1252 DBG2("Relay get data packet");
1253
eea7556c 1254 health_code_update();
2f8f53af
DG
1255
1256 ret = recv_request(cmd->sock, &get_packet_info, sizeof(get_packet_info));
1257 if (ret < 0) {
d3e2ba59
JD
1258 goto end;
1259 }
eea7556c 1260 health_code_update();
d3e2ba59 1261
0233a6a5
DG
1262 /* From this point on, the error label can be reached. */
1263 memset(&reply, 0, sizeof(reply));
1264
d3e2ba59 1265 rcu_read_lock();
2f8f53af 1266 stream = viewer_stream_find_by_id(be64toh(get_packet_info.stream_id));
d3e2ba59
JD
1267 if (!stream) {
1268 goto error;
1269 }
2a174661
DG
1270
1271 ctf_trace = ctf_trace_find_by_path(cmd->session->ctf_traces_ht,
1272 stream->path_name);
1273 assert(ctf_trace);
d3e2ba59
JD
1274
1275 /*
1276 * First time we read this stream, we need open the tracefile, we should
1277 * only arrive here if an index has already been sent to the viewer, so the
1278 * tracefile must exist, if it does not it is a fatal error.
1279 */
1280 if (stream->read_fd < 0) {
1281 char fullpath[PATH_MAX];
1282
6b6b9a5a
JD
1283 if (stream->tracefile_count > 0) {
1284 ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
1285 stream->channel_name,
1286 stream->tracefile_count_current);
1287 } else {
1288 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1289 stream->channel_name);
1290 }
d3e2ba59
JD
1291 if (ret < 0) {
1292 goto error;
1293 }
1294 ret = open(fullpath, O_RDONLY);
1295 if (ret < 0) {
1296 PERROR("Relay opening trace file");
1297 goto error;
1298 }
1299 stream->read_fd = ret;
1300 }
1301
2a174661
DG
1302 if (!ctf_trace->metadata_received ||
1303 ctf_trace->metadata_received > ctf_trace->metadata_sent) {
d3e2ba59
JD
1304 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1305 reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
d3e2ba59
JD
1306 goto send_reply;
1307 }
1308
4a9daf17
JD
1309 ret = check_new_streams(stream->session_id, sessions_ht);
1310 if (ret < 0) {
1311 goto end_unlock;
1312 } else if (ret == 1) {
1313 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1314 reply.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM;
1315 goto send_reply;
1316 }
1317
d3e2ba59
JD
1318 len = be32toh(get_packet_info.len);
1319 data = zmalloc(len);
1320 if (!data) {
1321 PERROR("relay data zmalloc");
1322 goto error;
1323 }
1324
1325 ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
1326 if (ret < 0) {
6b6b9a5a
JD
1327 /*
1328 * If the read fd was closed by the streaming side, the
1329 * abort_flag will be set to 1, otherwise it is an error.
1330 */
1331 if (stream->abort_flag == 0) {
1332 PERROR("lseek");
1333 goto error;
1334 }
1335 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1336 goto send_reply;
d3e2ba59 1337 }
6cd525e8
MD
1338 read_len = lttng_read(stream->read_fd, data, len);
1339 if (read_len < len) {
6b6b9a5a
JD
1340 /*
1341 * If the read fd was closed by the streaming side, the
1342 * abort_flag will be set to 1, otherwise it is an error.
1343 */
1344 if (stream->abort_flag == 0) {
1345 PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
1346 stream->read_fd,
1347 be64toh(get_packet_info.offset));
1348 goto error;
1349 } else {
1350 reply.status = htobe32(VIEWER_GET_PACKET_EOF);
1351 goto send_reply;
1352 }
d3e2ba59
JD
1353 }
1354 reply.status = htobe32(VIEWER_GET_PACKET_OK);
1355 reply.len = htobe32(len);
1356 send_data = 1;
1357 goto send_reply;
1358
1359error:
1360 reply.status = htobe32(VIEWER_GET_PACKET_ERR);
1361
1362send_reply:
1363 reply.flags = htobe32(reply.flags);
eea7556c
MD
1364
1365 health_code_update();
2f8f53af
DG
1366
1367 ret = send_response(cmd->sock, &reply, sizeof(reply));
d3e2ba59 1368 if (ret < 0) {
d3e2ba59
JD
1369 goto end_unlock;
1370 }
eea7556c 1371 health_code_update();
d3e2ba59
JD
1372
1373 if (send_data) {
eea7556c 1374 health_code_update();
2f8f53af 1375 ret = send_response(cmd->sock, data, len);
d3e2ba59 1376 if (ret < 0) {
d3e2ba59
JD
1377 goto end_unlock;
1378 }
eea7556c 1379 health_code_update();
d3e2ba59
JD
1380 }
1381
1382 DBG("Sent %u bytes for stream %" PRIu64, len,
1383 be64toh(get_packet_info.stream_id));
1384
1385end_unlock:
1386 free(data);
1387 rcu_read_unlock();
1388
1389end:
1390 return ret;
1391}
1392
1393/*
1394 * Send the session's metadata
1395 *
1396 * Return 0 on success else a negative value.
1397 */
1398static
92c6ca54 1399int viewer_get_metadata(struct relay_command *cmd)
d3e2ba59
JD
1400{
1401 int ret = 0;
1402 ssize_t read_len;
1403 uint64_t len = 0;
1404 char *data = NULL;
1405 struct lttng_viewer_get_metadata request;
1406 struct lttng_viewer_metadata_packet reply;
1407 struct relay_viewer_stream *stream;
2a174661 1408 struct ctf_trace *ctf_trace;
d3e2ba59
JD
1409
1410 assert(cmd);
d3e2ba59
JD
1411
1412 DBG("Relay get metadata");
1413
eea7556c 1414 health_code_update();
2f8f53af
DG
1415
1416 ret = recv_request(cmd->sock, &request, sizeof(request));
1417 if (ret < 0) {
d3e2ba59
JD
1418 goto end;
1419 }
eea7556c 1420 health_code_update();
d3e2ba59
JD
1421
1422 rcu_read_lock();
2f8f53af 1423 stream = viewer_stream_find_by_id(be64toh(request.stream_id));
d3e2ba59
JD
1424 if (!stream || !stream->metadata_flag) {
1425 ERR("Invalid metadata stream");
1426 goto error;
1427 }
d3e2ba59 1428
2a174661
DG
1429 ctf_trace = ctf_trace_find_by_path(cmd->session->ctf_traces_ht,
1430 stream->path_name);
1431 assert(ctf_trace);
1432 assert(ctf_trace->metadata_sent <= ctf_trace->metadata_received);
1433
1434 len = ctf_trace->metadata_received - ctf_trace->metadata_sent;
d3e2ba59
JD
1435 if (len == 0) {
1436 reply.status = htobe32(VIEWER_NO_NEW_METADATA);
1437 goto send_reply;
1438 }
1439
1440 /* first time, we open the metadata file */
1441 if (stream->read_fd < 0) {
1442 char fullpath[PATH_MAX];
1443
1444 ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
1445 stream->channel_name);
1446 if (ret < 0) {
1447 goto error;
1448 }
1449 ret = open(fullpath, O_RDONLY);
1450 if (ret < 0) {
1451 PERROR("Relay opening metadata file");
1452 goto error;
1453 }
1454 stream->read_fd = ret;
1455 }
1456
1457 reply.len = htobe64(len);
1458 data = zmalloc(len);
1459 if (!data) {
1460 PERROR("viewer metadata zmalloc");
1461 goto error;
1462 }
1463
6cd525e8
MD
1464 read_len = lttng_read(stream->read_fd, data, len);
1465 if (read_len < len) {
d3e2ba59
JD
1466 PERROR("Relay reading metadata file");
1467 goto error;
1468 }
2a174661 1469 ctf_trace->metadata_sent += read_len;
d3e2ba59
JD
1470 reply.status = htobe32(VIEWER_METADATA_OK);
1471 goto send_reply;
1472
1473error:
1474 reply.status = htobe32(VIEWER_METADATA_ERR);
1475
1476send_reply:
eea7556c 1477 health_code_update();
2f8f53af 1478 ret = send_response(cmd->sock, &reply, sizeof(reply));
d3e2ba59 1479 if (ret < 0) {
d3e2ba59
JD
1480 goto end_unlock;
1481 }
eea7556c 1482 health_code_update();
d3e2ba59
JD
1483
1484 if (len > 0) {
2f8f53af 1485 ret = send_response(cmd->sock, data, len);
d3e2ba59 1486 if (ret < 0) {
d3e2ba59
JD
1487 goto end_unlock;
1488 }
1489 }
1490
1491 DBG("Sent %" PRIu64 " bytes of metadata for stream %" PRIu64, len,
1492 be64toh(request.stream_id));
1493
1494 DBG("Metadata sent");
1495
1496end_unlock:
1497 free(data);
1498 rcu_read_unlock();
1499end:
1500 return ret;
1501}
1502
1503/*
1504 * live_relay_unknown_command: send -1 if received unknown command
1505 */
1506static
1507void live_relay_unknown_command(struct relay_command *cmd)
1508{
1509 struct lttcomm_relayd_generic_reply reply;
d3e2ba59
JD
1510
1511 reply.ret_code = htobe32(LTTNG_ERR_UNK);
2f8f53af 1512 (void) send_response(cmd->sock, &reply, sizeof(reply));
d3e2ba59
JD
1513}
1514
1515/*
1516 * Process the commands received on the control socket
1517 */
1518static
1519int process_control(struct lttng_viewer_cmd *recv_hdr,
92c6ca54 1520 struct relay_command *cmd, struct lttng_ht *sessions_ht)
d3e2ba59
JD
1521{
1522 int ret = 0;
2f8f53af
DG
1523 uint32_t msg_value;
1524
1525 assert(recv_hdr);
1526 assert(cmd);
1527 assert(sessions_ht);
1528
1529 msg_value = be32toh(recv_hdr->cmd);
1530
1531 /*
1532 * Make sure we've done the version check before any command other then a
1533 * new client connection.
1534 */
1535 if (msg_value != VIEWER_CONNECT && !cmd->version_check_done) {
1536 ERR("Viewer cmd value %" PRIu32 " before version check", msg_value);
1537 ret = -1;
1538 goto end;
1539 }
d3e2ba59 1540
2f8f53af 1541 switch (msg_value) {
d3e2ba59
JD
1542 case VIEWER_CONNECT:
1543 ret = viewer_connect(cmd);
1544 break;
1545 case VIEWER_LIST_SESSIONS:
1546 ret = viewer_list_sessions(cmd, sessions_ht);
1547 break;
1548 case VIEWER_ATTACH_SESSION:
92c6ca54 1549 ret = viewer_attach_session(cmd, sessions_ht);
d3e2ba59
JD
1550 break;
1551 case VIEWER_GET_NEXT_INDEX:
92c6ca54 1552 ret = viewer_get_next_index(cmd, sessions_ht);
d3e2ba59
JD
1553 break;
1554 case VIEWER_GET_PACKET:
4a9daf17 1555 ret = viewer_get_packet(cmd, sessions_ht);
d3e2ba59
JD
1556 break;
1557 case VIEWER_GET_METADATA:
92c6ca54 1558 ret = viewer_get_metadata(cmd);
d3e2ba59 1559 break;
80e8027a
JD
1560 case VIEWER_GET_NEW_STREAMS:
1561 ret = viewer_get_new_streams(cmd, sessions_ht);
1562 break;
d3e2ba59
JD
1563 default:
1564 ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd));
1565 live_relay_unknown_command(cmd);
1566 ret = -1;
1567 goto end;
1568 }
1569
1570end:
1571 return ret;
1572}
1573
1574static
1575void cleanup_poll_connection(struct lttng_poll_event *events, int pollfd)
1576{
1577 int ret;
1578
1579 assert(events);
1580
1581 lttng_poll_del(events, pollfd);
1582
1583 ret = close(pollfd);
1584 if (ret < 0) {
1585 ERR("Closing pollfd %d", pollfd);
1586 }
1587}
1588
1589/*
1590 * Create and add connection to the given hash table.
1591 *
1592 * Return poll add value or else -1 on error.
1593 */
1594static
1595int add_connection(int fd, struct lttng_poll_event *events,
1596 struct lttng_ht *relay_connections_ht)
1597{
1598 int ret;
1599 struct relay_command *relay_connection;
1600
1601 assert(events);
1602 assert(relay_connections_ht);
1603
1604 relay_connection = zmalloc(sizeof(struct relay_command));
1605 if (relay_connection == NULL) {
1606 PERROR("Relay command zmalloc");
1607 goto error;
1608 }
1609
6cd525e8
MD
1610 ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
1611 if (ret < sizeof(*relay_connection)) {
d3e2ba59
JD
1612 PERROR("read relay cmd pipe");
1613 goto error_read;
1614 }
1615
1616 lttng_ht_node_init_ulong(&relay_connection->sock_n,
1617 (unsigned long) relay_connection->sock->fd);
1618 rcu_read_lock();
1619 lttng_ht_add_unique_ulong(relay_connections_ht,
1620 &relay_connection->sock_n);
1621 rcu_read_unlock();
1622
1623 return lttng_poll_add(events, relay_connection->sock->fd,
1624 LPOLLIN | LPOLLRDHUP);
1625
1626error_read:
1627 free(relay_connection);
1628error:
1629 return -1;
1630}
1631
1632static
1633void deferred_free_connection(struct rcu_head *head)
1634{
1635 struct relay_command *relay_connection =
1636 caa_container_of(head, struct relay_command, rcu_node);
1637
d3e2ba59
JD
1638 lttcomm_destroy_sock(relay_connection->sock);
1639 free(relay_connection);
1640}
1641
157df586
JD
1642/*
1643 * Delete all streams for a specific session ID.
1644 */
2a174661 1645static void destroy_viewer_streams_by_session(struct relay_session *session)
d3e2ba59 1646{
d3e2ba59 1647 struct relay_viewer_stream *stream;
d3e2ba59
JD
1648 struct lttng_ht_iter iter;
1649
2a174661
DG
1650 assert(session);
1651
d3e2ba59 1652 rcu_read_lock();
157df586
JD
1653 cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
1654 stream_n.node) {
2a174661 1655 struct ctf_trace *ctf_trace;
eea7556c 1656
2a174661
DG
1657 health_code_update();
1658 if (stream->session_id != session->id) {
d3e2ba59
JD
1659 continue;
1660 }
1661
2a174661
DG
1662 ctf_trace = ctf_trace_find_by_path(session->ctf_traces_ht,
1663 stream->path_name);
1664 assert(ctf_trace);
1665
2f8f53af 1666 viewer_stream_delete(stream);
157df586
JD
1667
1668 if (stream->metadata_flag) {
2a174661
DG
1669 ctf_trace->metadata_sent = 0;
1670 ctf_trace->viewer_metadata_stream = NULL;
d3e2ba59 1671 }
2a174661
DG
1672
1673 viewer_stream_destroy(ctf_trace, stream);
d3e2ba59
JD
1674 }
1675 rcu_read_unlock();
1676}
1677
2a174661
DG
1678static void try_destroy_streams(struct relay_session *session)
1679{
1680 struct ctf_trace *ctf_trace;
1681 struct lttng_ht_iter iter;
1682
1683 assert(session);
1684
1685 cds_lfht_for_each_entry(session->ctf_traces_ht->ht, &iter.iter, ctf_trace,
1686 node.node) {
1687 /* Attempt to destroy the ctf trace of that session. */
1688 ctf_trace_try_destroy(session, ctf_trace);
1689 }
1690}
1691
d3e2ba59
JD
1692/*
1693 * Delete and free a connection.
1694 *
1695 * RCU read side lock MUST be acquired.
1696 */
1697static
1698void del_connection(struct lttng_ht *relay_connections_ht,
2a174661
DG
1699 struct lttng_ht_iter *iter, struct relay_command *relay_connection,
1700 struct lttng_ht *sessions_ht)
d3e2ba59
JD
1701{
1702 int ret;
2a174661 1703 struct relay_session *session;
d3e2ba59
JD
1704
1705 assert(relay_connections_ht);
1706 assert(iter);
1707 assert(relay_connection);
2a174661 1708 assert(sessions_ht);
d3e2ba59 1709
157df586
JD
1710 DBG("Cleaning connection of session ID %" PRIu64,
1711 relay_connection->session_id);
1712
2a174661 1713 rcu_read_lock();
d3e2ba59
JD
1714 ret = lttng_ht_del(relay_connections_ht, iter);
1715 assert(!ret);
1716
2a174661
DG
1717 session = session_find_by_id(sessions_ht, relay_connection->session_id);
1718 if (session) {
1719 /*
1720 * Very important that this is done before destroying the session so we
1721 * can put back every viewer stream reference from the ctf_trace.
1722 */
1723 destroy_viewer_streams_by_session(session);
1724 try_destroy_streams(session);
1725 session_viewer_try_destroy(sessions_ht, session);
1726 }
1727 rcu_read_unlock();
d3e2ba59
JD
1728
1729 call_rcu(&relay_connection->rcu_node, deferred_free_connection);
1730}
1731
1732/*
1733 * This thread does the actual work
1734 */
1735static
1736void *thread_worker(void *data)
1737{
1738 int ret, err = -1;
1739 uint32_t nb_fd;
1740 struct relay_command *relay_connection;
1741 struct lttng_poll_event events;
1742 struct lttng_ht *relay_connections_ht;
1743 struct lttng_ht_node_ulong *node;
1744 struct lttng_ht_iter iter;
1745 struct lttng_viewer_cmd recv_hdr;
1746 struct relay_local_data *relay_ctx = (struct relay_local_data *) data;
1747 struct lttng_ht *sessions_ht = relay_ctx->sessions_ht;
d3e2ba59
JD
1748
1749 DBG("[thread] Live viewer relay worker started");
1750
1751 rcu_register_thread();
1752
eea7556c
MD
1753 health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
1754
9b5e0863
MD
1755 if (testpoint(relayd_thread_live_worker)) {
1756 goto error_testpoint;
1757 }
1758
d3e2ba59
JD
1759 /* table of connections indexed on socket */
1760 relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1761 if (!relay_connections_ht) {
1762 goto relay_connections_ht_error;
1763 }
1764
1765 ret = create_thread_poll_set(&events, 2);
1766 if (ret < 0) {
1767 goto error_poll_create;
1768 }
1769
1770 ret = lttng_poll_add(&events, live_relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP);
1771 if (ret < 0) {
1772 goto error;
1773 }
1774
1775restart:
1776 while (1) {
1777 int i;
1778
eea7556c
MD
1779 health_code_update();
1780
d3e2ba59
JD
1781 /* Infinite blocking call, waiting for transmission */
1782 DBG3("Relayd live viewer worker thread polling...");
eea7556c 1783 health_poll_entry();
d3e2ba59 1784 ret = lttng_poll_wait(&events, -1);
eea7556c 1785 health_poll_exit();
d3e2ba59
JD
1786 if (ret < 0) {
1787 /*
1788 * Restart interrupted system call.
1789 */
1790 if (errno == EINTR) {
1791 goto restart;
1792 }
1793 goto error;
1794 }
1795
1796 nb_fd = ret;
1797
1798 /*
1799 * Process control. The control connection is prioritised so we don't
1800 * starve it with high throughput tracing data on the data
1801 * connection.
1802 */
1803 for (i = 0; i < nb_fd; i++) {
1804 /* Fetch once the poll data */
1805 uint32_t revents = LTTNG_POLL_GETEV(&events, i);
1806 int pollfd = LTTNG_POLL_GETFD(&events, i);
1807
eea7556c
MD
1808 health_code_update();
1809
d3e2ba59 1810 /* Thread quit pipe has been closed. Killing thread. */
2a174661 1811 ret = check_live_conn_pipe(pollfd, revents);
d3e2ba59
JD
1812 if (ret) {
1813 err = 0;
1814 goto exit;
1815 }
1816
1817 /* Inspect the relay cmd pipe for new connection */
1818 if (pollfd == live_relay_cmd_pipe[0]) {
1819 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
1820 ERR("Relay live pipe error");
1821 goto error;
1822 } else if (revents & LPOLLIN) {
1823 DBG("Relay live viewer command received");
1824 ret = add_connection(live_relay_cmd_pipe[0],
1825 &events, relay_connections_ht);
1826 if (ret < 0) {
1827 goto error;
1828 }
1829 }
1830 } else if (revents) {
1831 rcu_read_lock();
1832 lttng_ht_lookup(relay_connections_ht,
1833 (void *)((unsigned long) pollfd), &iter);
1834 node = lttng_ht_iter_get_node_ulong(&iter);
1835 if (node == NULL) {
1836 DBG2("Relay viewer sock %d not found", pollfd);
1837 rcu_read_unlock();
1838 goto error;
1839 }
1840 relay_connection = caa_container_of(node, struct relay_command,
1841 sock_n);
1842
1843 if (revents & (LPOLLERR)) {
d3e2ba59
JD
1844 cleanup_poll_connection(&events, pollfd);
1845 del_connection(relay_connections_ht, &iter,
2a174661 1846 relay_connection, relay_ctx->sessions_ht);
d3e2ba59
JD
1847 } else if (revents & (LPOLLHUP | LPOLLRDHUP)) {
1848 DBG("Viewer socket %d hung up", pollfd);
1849 cleanup_poll_connection(&events, pollfd);
1850 del_connection(relay_connections_ht, &iter,
2a174661 1851 relay_connection, relay_ctx->sessions_ht);
d3e2ba59
JD
1852 } else if (revents & LPOLLIN) {
1853 ret = relay_connection->sock->ops->recvmsg(
1854 relay_connection->sock, &recv_hdr,
1855 sizeof(struct lttng_viewer_cmd),
1856 0);
1857 /* connection closed */
1858 if (ret <= 0) {
1859 cleanup_poll_connection(&events, pollfd);
aaec7998 1860 del_connection(relay_connections_ht, &iter,
2a174661 1861 relay_connection, relay_ctx->sessions_ht);
d3e2ba59
JD
1862 DBG("Viewer control connection closed with %d",
1863 pollfd);
1864 } else {
1865 if (relay_connection->session) {
1866 DBG2("Relay viewer worker receiving data for "
1867 "session: %" PRIu64,
1868 relay_connection->session->id);
1869 }
1870 ret = process_control(&recv_hdr, relay_connection,
92c6ca54 1871 sessions_ht);
d3e2ba59
JD
1872 if (ret < 0) {
1873 /* Clear the session on error. */
1874 cleanup_poll_connection(&events, pollfd);
1875 del_connection(relay_connections_ht, &iter,
2a174661 1876 relay_connection, relay_ctx->sessions_ht);
d3e2ba59
JD
1877 DBG("Viewer connection closed with %d", pollfd);
1878 }
1879 }
1880 }
1881 rcu_read_unlock();
1882 }
1883 }
1884 }
1885
1886exit:
1887error:
1888 lttng_poll_clean(&events);
1889
1890 /* empty the hash table and free the memory */
1891 rcu_read_lock();
1892 cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
eea7556c
MD
1893 health_code_update();
1894
d3e2ba59
JD
1895 node = lttng_ht_iter_get_node_ulong(&iter);
1896 if (!node) {
1897 continue;
1898 }
1899
1900 relay_connection = caa_container_of(node, struct relay_command,
1901 sock_n);
2a174661
DG
1902 del_connection(relay_connections_ht, &iter, relay_connection,
1903 relay_ctx->sessions_ht);
d3e2ba59
JD
1904 }
1905 rcu_read_unlock();
1906error_poll_create:
1907 lttng_ht_destroy(relay_connections_ht);
1908relay_connections_ht_error:
1909 /* Close relay cmd pipes */
1910 utils_close_pipe(live_relay_cmd_pipe);
1911 if (err) {
1912 DBG("Viewer worker thread exited with error");
1913 }
1914 DBG("Viewer worker thread cleanup complete");
9b5e0863 1915error_testpoint:
eea7556c
MD
1916 if (err) {
1917 health_error();
1918 ERR("Health error occurred in %s", __func__);
1919 }
1920 health_unregister(health_relayd);
d3e2ba59
JD
1921 stop_threads();
1922 rcu_unregister_thread();
1923 return NULL;
1924}
1925
1926/*
1927 * Create the relay command pipe to wake thread_manage_apps.
1928 * Closed in cleanup().
1929 */
1930static int create_relay_cmd_pipe(void)
1931{
1932 int ret;
1933
1934 ret = utils_create_pipe_cloexec(live_relay_cmd_pipe);
1935
1936 return ret;
1937}
1938
aaec7998 1939void live_stop_threads(void)
d3e2ba59
JD
1940{
1941 int ret;
1942 void *status;
1943
1944 stop_threads();
1945
1946 ret = pthread_join(live_listener_thread, &status);
1947 if (ret != 0) {
1948 PERROR("pthread_join live listener");
1949 goto error; /* join error, exit without cleanup */
1950 }
1951
1952 ret = pthread_join(live_worker_thread, &status);
1953 if (ret != 0) {
1954 PERROR("pthread_join live worker");
1955 goto error; /* join error, exit without cleanup */
1956 }
1957
1958 ret = pthread_join(live_dispatcher_thread, &status);
1959 if (ret != 0) {
1960 PERROR("pthread_join live dispatcher");
1961 goto error; /* join error, exit without cleanup */
1962 }
1963
1964 cleanup();
1965
1966error:
1967 return;
1968}
1969
1970/*
1971 * main
1972 */
1973int live_start_threads(struct lttng_uri *uri,
0b242f62 1974 struct relay_local_data *relay_ctx)
d3e2ba59
JD
1975{
1976 int ret = 0;
1977 void *status;
1978 int is_root;
1979
1980 assert(uri);
1981 live_uri = uri;
1982
d3e2ba59
JD
1983 /* Check if daemon is UID = 0 */
1984 is_root = !getuid();
1985
1986 if (!is_root) {
1987 if (live_uri->port < 1024) {
1988 ERR("Need to be root to use ports < 1024");
1989 ret = -1;
1990 goto exit;
1991 }
1992 }
1993
1994 /* Setup the thread apps communication pipe. */
1995 if ((ret = create_relay_cmd_pipe()) < 0) {
1996 goto exit;
1997 }
1998
1999 /* Init relay command queue. */
2000 cds_wfq_init(&viewer_cmd_queue.queue);
2001
2002 /* Set up max poll set size */
2003 lttng_poll_set_max_size();
2004
2005 /* Setup the dispatcher thread */
2006 ret = pthread_create(&live_dispatcher_thread, NULL,
2007 thread_dispatcher, (void *) NULL);
2008 if (ret != 0) {
2009 PERROR("pthread_create viewer dispatcher");
2010 goto exit_dispatcher;
2011 }
2012
2013 /* Setup the worker thread */
2014 ret = pthread_create(&live_worker_thread, NULL,
2015 thread_worker, relay_ctx);
2016 if (ret != 0) {
2017 PERROR("pthread_create viewer worker");
2018 goto exit_worker;
2019 }
2020
2021 /* Setup the listener thread */
2022 ret = pthread_create(&live_listener_thread, NULL,
2023 thread_listener, (void *) NULL);
2024 if (ret != 0) {
2025 PERROR("pthread_create viewer listener");
2026 goto exit_listener;
2027 }
2028
2029 ret = 0;
2030 goto end;
2031
2032exit_listener:
2033 ret = pthread_join(live_listener_thread, &status);
2034 if (ret != 0) {
2035 PERROR("pthread_join live listener");
2036 goto error; /* join error, exit without cleanup */
2037 }
2038
2039exit_worker:
2040 ret = pthread_join(live_worker_thread, &status);
2041 if (ret != 0) {
2042 PERROR("pthread_join live worker");
2043 goto error; /* join error, exit without cleanup */
2044 }
2045
2046exit_dispatcher:
2047 ret = pthread_join(live_dispatcher_thread, &status);
2048 if (ret != 0) {
2049 PERROR("pthread_join live dispatcher");
2050 goto error; /* join error, exit without cleanup */
2051 }
2052
2053exit:
2054 cleanup();
2055
2056end:
2057error:
2058 return ret;
2059}
This page took 0.203975 seconds and 4 git commands to generate.