Fix: consumer should await for initial streams
[lttng-tools.git] / src / common / consumer.c
CommitLineData
3bd1e081
MD
1/*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
00e2e675 4 * 2012 - David Goulet <dgoulet@efficios.com>
3bd1e081 5 *
d14d33bf
AM
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
3bd1e081 9 *
d14d33bf
AM
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
3bd1e081 14 *
d14d33bf
AM
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
3bd1e081
MD
18 */
19
20#define _GNU_SOURCE
21#include <assert.h>
3bd1e081
MD
22#include <poll.h>
23#include <pthread.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/mman.h>
27#include <sys/socket.h>
28#include <sys/types.h>
29#include <unistd.h>
77c7c900 30#include <inttypes.h>
3bd1e081 31
990570ed 32#include <common/common.h>
fb3a43a9
DG
33#include <common/utils.h>
34#include <common/compat/poll.h>
10a8a223 35#include <common/kernel-ctl/kernel-ctl.h>
00e2e675 36#include <common/sessiond-comm/relayd.h>
10a8a223
DG
37#include <common/sessiond-comm/sessiond-comm.h>
38#include <common/kernel-consumer/kernel-consumer.h>
00e2e675 39#include <common/relayd/relayd.h>
10a8a223
DG
40#include <common/ust-consumer/ust-consumer.h>
41
42#include "consumer.h"
3bd1e081
MD
43
44struct lttng_consumer_global_data consumer_data = {
3bd1e081
MD
45 .stream_count = 0,
46 .need_update = 1,
47 .type = LTTNG_CONSUMER_UNKNOWN,
48};
49
50/* timeout parameter, to control the polling thread grace period. */
51int consumer_poll_timeout = -1;
52
53/*
54 * Flag to inform the polling thread to quit when all fd hung up. Updated by
55 * the consumer_thread_receive_fds when it notices that all fds has hung up.
56 * Also updated by the signal handler (consumer_should_exit()). Read by the
57 * polling threads.
58 */
59volatile int consumer_quit = 0;
60
61/*
62 * Find a stream. The consumer_data.lock must be locked during this
63 * call.
64 */
65static struct lttng_consumer_stream *consumer_find_stream(int key)
66{
e4421fec
DG
67 struct lttng_ht_iter iter;
68 struct lttng_ht_node_ulong *node;
69 struct lttng_consumer_stream *stream = NULL;
3bd1e081 70
7ad0a0cb
MD
71 /* Negative keys are lookup failures */
72 if (key < 0)
73 return NULL;
e4421fec 74
6065ceec
DG
75 rcu_read_lock();
76
e4421fec
DG
77 lttng_ht_lookup(consumer_data.stream_ht, (void *)((unsigned long) key),
78 &iter);
79 node = lttng_ht_iter_get_node_ulong(&iter);
80 if (node != NULL) {
81 stream = caa_container_of(node, struct lttng_consumer_stream, node);
3bd1e081 82 }
e4421fec 83
6065ceec
DG
84 rcu_read_unlock();
85
e4421fec 86 return stream;
3bd1e081
MD
87}
88
7ad0a0cb
MD
89static void consumer_steal_stream_key(int key)
90{
91 struct lttng_consumer_stream *stream;
92
04253271 93 rcu_read_lock();
7ad0a0cb 94 stream = consumer_find_stream(key);
04253271 95 if (stream) {
7ad0a0cb 96 stream->key = -1;
04253271
MD
97 /*
98 * We don't want the lookup to match, but we still need
99 * to iterate on this stream when iterating over the hash table. Just
100 * change the node key.
101 */
102 stream->node.key = -1;
103 }
104 rcu_read_unlock();
7ad0a0cb
MD
105}
106
3bd1e081
MD
107static struct lttng_consumer_channel *consumer_find_channel(int key)
108{
e4421fec
DG
109 struct lttng_ht_iter iter;
110 struct lttng_ht_node_ulong *node;
111 struct lttng_consumer_channel *channel = NULL;
3bd1e081 112
7ad0a0cb
MD
113 /* Negative keys are lookup failures */
114 if (key < 0)
115 return NULL;
e4421fec 116
6065ceec
DG
117 rcu_read_lock();
118
e4421fec
DG
119 lttng_ht_lookup(consumer_data.channel_ht, (void *)((unsigned long) key),
120 &iter);
121 node = lttng_ht_iter_get_node_ulong(&iter);
122 if (node != NULL) {
123 channel = caa_container_of(node, struct lttng_consumer_channel, node);
3bd1e081 124 }
e4421fec 125
6065ceec
DG
126 rcu_read_unlock();
127
e4421fec 128 return channel;
3bd1e081
MD
129}
130
7ad0a0cb
MD
131static void consumer_steal_channel_key(int key)
132{
133 struct lttng_consumer_channel *channel;
134
04253271 135 rcu_read_lock();
7ad0a0cb 136 channel = consumer_find_channel(key);
04253271 137 if (channel) {
7ad0a0cb 138 channel->key = -1;
04253271
MD
139 /*
140 * We don't want the lookup to match, but we still need
141 * to iterate on this channel when iterating over the hash table. Just
142 * change the node key.
143 */
144 channel->node.key = -1;
145 }
146 rcu_read_unlock();
7ad0a0cb
MD
147}
148
702b1ea4
MD
149static
150void consumer_free_stream(struct rcu_head *head)
151{
152 struct lttng_ht_node_ulong *node =
153 caa_container_of(head, struct lttng_ht_node_ulong, head);
154 struct lttng_consumer_stream *stream =
155 caa_container_of(node, struct lttng_consumer_stream, node);
156
157 free(stream);
158}
159
00e2e675
DG
160/*
161 * RCU protected relayd socket pair free.
162 */
163static void consumer_rcu_free_relayd(struct rcu_head *head)
164{
165 struct lttng_ht_node_ulong *node =
166 caa_container_of(head, struct lttng_ht_node_ulong, head);
167 struct consumer_relayd_sock_pair *relayd =
168 caa_container_of(node, struct consumer_relayd_sock_pair, node);
169
170 free(relayd);
171}
172
173/*
174 * Destroy and free relayd socket pair object.
175 *
176 * This function MUST be called with the consumer_data lock acquired.
177 */
d09e1200 178static void destroy_relayd(struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
179{
180 int ret;
181 struct lttng_ht_iter iter;
182
173af62f
DG
183 if (relayd == NULL) {
184 return;
185 }
186
00e2e675
DG
187 DBG("Consumer destroy and close relayd socket pair");
188
189 iter.iter.node = &relayd->node.node;
190 ret = lttng_ht_del(consumer_data.relayd_ht, &iter);
173af62f
DG
191 if (ret != 0) {
192 /* We assume the relayd was already destroyed */
193 return;
194 }
00e2e675
DG
195
196 /* Close all sockets */
197 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
198 (void) relayd_close(&relayd->control_sock);
199 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
200 (void) relayd_close(&relayd->data_sock);
201
202 /* RCU free() call */
203 call_rcu(&relayd->node.head, consumer_rcu_free_relayd);
204}
205
a6ba4fe1
DG
206/*
207 * Flag a relayd socket pair for destruction. Destroy it if the refcount
208 * reaches zero.
209 *
210 * RCU read side lock MUST be aquired before calling this function.
211 */
212void consumer_flag_relayd_for_destroy(struct consumer_relayd_sock_pair *relayd)
213{
214 assert(relayd);
215
216 /* Set destroy flag for this object */
217 uatomic_set(&relayd->destroy_flag, 1);
218
219 /* Destroy the relayd if refcount is 0 */
220 if (uatomic_read(&relayd->refcount) == 0) {
d09e1200 221 destroy_relayd(relayd);
a6ba4fe1
DG
222 }
223}
224
3bd1e081
MD
225/*
226 * Remove a stream from the global list protected by a mutex. This
227 * function is also responsible for freeing its data structures.
228 */
229void consumer_del_stream(struct lttng_consumer_stream *stream)
230{
231 int ret;
e4421fec 232 struct lttng_ht_iter iter;
3bd1e081 233 struct lttng_consumer_channel *free_chan = NULL;
00e2e675
DG
234 struct consumer_relayd_sock_pair *relayd;
235
236 assert(stream);
3bd1e081
MD
237
238 pthread_mutex_lock(&consumer_data.lock);
239
240 switch (consumer_data.type) {
241 case LTTNG_CONSUMER_KERNEL:
242 if (stream->mmap_base != NULL) {
243 ret = munmap(stream->mmap_base, stream->mmap_len);
244 if (ret != 0) {
245 perror("munmap");
246 }
247 }
248 break;
7753dea8
MD
249 case LTTNG_CONSUMER32_UST:
250 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
251 lttng_ustconsumer_del_stream(stream);
252 break;
253 default:
254 ERR("Unknown consumer_data type");
255 assert(0);
256 goto end;
257 }
258
6065ceec 259 rcu_read_lock();
04253271
MD
260 iter.iter.node = &stream->node.node;
261 ret = lttng_ht_del(consumer_data.stream_ht, &iter);
262 assert(!ret);
e4421fec 263
6065ceec
DG
264 rcu_read_unlock();
265
3bd1e081
MD
266 if (consumer_data.stream_count <= 0) {
267 goto end;
268 }
269 consumer_data.stream_count--;
270 if (!stream) {
271 goto end;
272 }
273 if (stream->out_fd >= 0) {
4c462e79
MD
274 ret = close(stream->out_fd);
275 if (ret) {
276 PERROR("close");
277 }
3bd1e081 278 }
b5c5fc29 279 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
4c462e79
MD
280 ret = close(stream->wait_fd);
281 if (ret) {
282 PERROR("close");
283 }
3bd1e081 284 }
2c1dd183 285 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
4c462e79
MD
286 ret = close(stream->shm_fd);
287 if (ret) {
288 PERROR("close");
289 }
3bd1e081 290 }
00e2e675
DG
291
292 /* Check and cleanup relayd */
b0b335c8 293 rcu_read_lock();
00e2e675
DG
294 relayd = consumer_find_relayd(stream->net_seq_idx);
295 if (relayd != NULL) {
b0b335c8
MD
296 uatomic_dec(&relayd->refcount);
297 assert(uatomic_read(&relayd->refcount) >= 0);
173af62f 298
3f8e211f
DG
299 /* Closing streams requires to lock the control socket. */
300 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
173af62f
DG
301 ret = relayd_send_close_stream(&relayd->control_sock,
302 stream->relayd_stream_id,
303 stream->next_net_seq_num - 1);
3f8e211f 304 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
173af62f 305 if (ret < 0) {
a4b92340
DG
306 DBG("Unable to close stream on the relayd. Continuing");
307 /*
308 * Continue here. There is nothing we can do for the relayd.
309 * Chances are that the relayd has closed the socket so we just
310 * continue cleaning up.
311 */
173af62f
DG
312 }
313
314 /* Both conditions are met, we destroy the relayd. */
315 if (uatomic_read(&relayd->refcount) == 0 &&
316 uatomic_read(&relayd->destroy_flag)) {
d09e1200 317 destroy_relayd(relayd);
00e2e675 318 }
00e2e675 319 }
b0b335c8 320 rcu_read_unlock();
00e2e675 321
c30aaa51
MD
322 uatomic_dec(&stream->chan->refcount);
323 if (!uatomic_read(&stream->chan->refcount)
324 && !uatomic_read(&stream->chan->nb_init_streams)) {
3bd1e081 325 free_chan = stream->chan;
00e2e675
DG
326 }
327
702b1ea4 328 call_rcu(&stream->node.head, consumer_free_stream);
3bd1e081
MD
329end:
330 consumer_data.need_update = 1;
331 pthread_mutex_unlock(&consumer_data.lock);
332
c30aaa51 333 if (free_chan) {
3bd1e081 334 consumer_del_channel(free_chan);
c30aaa51 335 }
3bd1e081
MD
336}
337
338struct lttng_consumer_stream *consumer_allocate_stream(
339 int channel_key, int stream_key,
340 int shm_fd, int wait_fd,
341 enum lttng_consumer_stream_state state,
342 uint64_t mmap_len,
343 enum lttng_event_output output,
6df2e2c9
MD
344 const char *path_name,
345 uid_t uid,
00e2e675
DG
346 gid_t gid,
347 int net_index,
348 int metadata_flag)
3bd1e081
MD
349{
350 struct lttng_consumer_stream *stream;
351 int ret;
352
effcf122 353 stream = zmalloc(sizeof(*stream));
3bd1e081
MD
354 if (stream == NULL) {
355 perror("malloc struct lttng_consumer_stream");
356 goto end;
357 }
358 stream->chan = consumer_find_channel(channel_key);
359 if (!stream->chan) {
360 perror("Unable to find channel key");
361 goto end;
362 }
363 stream->chan->refcount++;
364 stream->key = stream_key;
365 stream->shm_fd = shm_fd;
366 stream->wait_fd = wait_fd;
367 stream->out_fd = -1;
368 stream->out_fd_offset = 0;
369 stream->state = state;
370 stream->mmap_len = mmap_len;
371 stream->mmap_base = NULL;
372 stream->output = output;
6df2e2c9
MD
373 stream->uid = uid;
374 stream->gid = gid;
00e2e675
DG
375 stream->net_seq_idx = net_index;
376 stream->metadata_flag = metadata_flag;
377 strncpy(stream->path_name, path_name, sizeof(stream->path_name));
378 stream->path_name[sizeof(stream->path_name) - 1] = '\0';
e4421fec 379 lttng_ht_node_init_ulong(&stream->node, stream->key);
00e2e675 380 lttng_ht_node_init_ulong(&stream->waitfd_node, stream->wait_fd);
3bd1e081
MD
381
382 switch (consumer_data.type) {
383 case LTTNG_CONSUMER_KERNEL:
384 break;
7753dea8
MD
385 case LTTNG_CONSUMER32_UST:
386 case LTTNG_CONSUMER64_UST:
5af2f756 387 stream->cpu = stream->chan->cpucount++;
3bd1e081
MD
388 ret = lttng_ustconsumer_allocate_stream(stream);
389 if (ret) {
390 free(stream);
391 return NULL;
392 }
393 break;
394 default:
395 ERR("Unknown consumer_data type");
396 assert(0);
397 goto end;
398 }
c30aaa51
MD
399
400 /*
401 * When nb_init_streams reaches 0, we don't need to trigger any action in
402 * terms of destroying the associated channel, because the action that
403 * causes the count to become 0 also causes a stream to be added. The
404 * channel deletion will thus be triggered by the following removal of this
405 * stream.
406 */
407 if (uatomic_read(&stream->chan->nb_init_streams) > 0) {
408 uatomic_dec(&stream->chan->nb_init_streams);
409 }
410
411 DBG3("Allocated stream %s (key %d, shm_fd %d, wait_fd %d, mmap_len %llu,"
412 " out_fd %d, net_seq_idx %d)", stream->path_name, stream->key,
413 stream->shm_fd, stream->wait_fd,
414 (unsigned long long) stream->mmap_len, stream->out_fd,
00e2e675 415 stream->net_seq_idx);
c30aaa51 416
3bd1e081
MD
417end:
418 return stream;
419}
420
421/*
422 * Add a stream to the global list protected by a mutex.
423 */
424int consumer_add_stream(struct lttng_consumer_stream *stream)
425{
426 int ret = 0;
c77fc10a
DG
427 struct lttng_ht_node_ulong *node;
428 struct lttng_ht_iter iter;
00e2e675 429 struct consumer_relayd_sock_pair *relayd;
3bd1e081
MD
430
431 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
432 /* Steal stream identifier, for UST */
433 consumer_steal_stream_key(stream->key);
c77fc10a 434
b0b335c8 435 rcu_read_lock();
c77fc10a
DG
436 lttng_ht_lookup(consumer_data.stream_ht,
437 (void *)((unsigned long) stream->key), &iter);
438 node = lttng_ht_iter_get_node_ulong(&iter);
439 if (node != NULL) {
440 rcu_read_unlock();
441 /* Stream already exist. Ignore the insertion */
442 goto end;
443 }
444
04253271 445 lttng_ht_add_unique_ulong(consumer_data.stream_ht, &stream->node);
00e2e675
DG
446
447 /* Check and cleanup relayd */
448 relayd = consumer_find_relayd(stream->net_seq_idx);
449 if (relayd != NULL) {
b0b335c8 450 uatomic_inc(&relayd->refcount);
00e2e675 451 }
b0b335c8 452 rcu_read_unlock();
00e2e675
DG
453
454 /* Update consumer data */
3bd1e081
MD
455 consumer_data.stream_count++;
456 consumer_data.need_update = 1;
457
3bd1e081
MD
458end:
459 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 460
3bd1e081
MD
461 return ret;
462}
463
00e2e675 464/*
3f8e211f
DG
465 * Add relayd socket to global consumer data hashtable. RCU read side lock MUST
466 * be acquired before calling this.
00e2e675 467 */
d09e1200 468static int add_relayd(struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
469{
470 int ret = 0;
471 struct lttng_ht_node_ulong *node;
472 struct lttng_ht_iter iter;
473
474 if (relayd == NULL) {
475 ret = -1;
476 goto end;
477 }
478
00e2e675
DG
479 lttng_ht_lookup(consumer_data.relayd_ht,
480 (void *)((unsigned long) relayd->net_seq_idx), &iter);
481 node = lttng_ht_iter_get_node_ulong(&iter);
482 if (node != NULL) {
00e2e675
DG
483 /* Relayd already exist. Ignore the insertion */
484 goto end;
485 }
486 lttng_ht_add_unique_ulong(consumer_data.relayd_ht, &relayd->node);
487
00e2e675
DG
488end:
489 return ret;
490}
491
492/*
493 * Allocate and return a consumer relayd socket.
494 */
495struct consumer_relayd_sock_pair *consumer_allocate_relayd_sock_pair(
496 int net_seq_idx)
497{
498 struct consumer_relayd_sock_pair *obj = NULL;
499
500 /* Negative net sequence index is a failure */
501 if (net_seq_idx < 0) {
502 goto error;
503 }
504
505 obj = zmalloc(sizeof(struct consumer_relayd_sock_pair));
506 if (obj == NULL) {
507 PERROR("zmalloc relayd sock");
508 goto error;
509 }
510
511 obj->net_seq_idx = net_seq_idx;
512 obj->refcount = 0;
173af62f 513 obj->destroy_flag = 0;
00e2e675
DG
514 lttng_ht_node_init_ulong(&obj->node, obj->net_seq_idx);
515 pthread_mutex_init(&obj->ctrl_sock_mutex, NULL);
516
517error:
518 return obj;
519}
520
521/*
522 * Find a relayd socket pair in the global consumer data.
523 *
524 * Return the object if found else NULL.
b0b335c8
MD
525 * RCU read-side lock must be held across this call and while using the
526 * returned object.
00e2e675
DG
527 */
528struct consumer_relayd_sock_pair *consumer_find_relayd(int key)
529{
530 struct lttng_ht_iter iter;
531 struct lttng_ht_node_ulong *node;
532 struct consumer_relayd_sock_pair *relayd = NULL;
533
534 /* Negative keys are lookup failures */
535 if (key < 0) {
536 goto error;
537 }
538
00e2e675
DG
539 lttng_ht_lookup(consumer_data.relayd_ht, (void *)((unsigned long) key),
540 &iter);
541 node = lttng_ht_iter_get_node_ulong(&iter);
542 if (node != NULL) {
543 relayd = caa_container_of(node, struct consumer_relayd_sock_pair, node);
544 }
545
00e2e675
DG
546error:
547 return relayd;
548}
549
550/*
551 * Handle stream for relayd transmission if the stream applies for network
552 * streaming where the net sequence index is set.
553 *
554 * Return destination file descriptor or negative value on error.
555 */
6197aea7 556static int write_relayd_stream_header(struct lttng_consumer_stream *stream,
1d4dfdef
DG
557 size_t data_size, unsigned long padding,
558 struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
559{
560 int outfd = -1, ret;
00e2e675
DG
561 struct lttcomm_relayd_data_hdr data_hdr;
562
563 /* Safety net */
564 assert(stream);
6197aea7 565 assert(relayd);
00e2e675
DG
566
567 /* Reset data header */
568 memset(&data_hdr, 0, sizeof(data_hdr));
569
00e2e675
DG
570 if (stream->metadata_flag) {
571 /* Caller MUST acquire the relayd control socket lock */
572 ret = relayd_send_metadata(&relayd->control_sock, data_size);
573 if (ret < 0) {
574 goto error;
575 }
576
577 /* Metadata are always sent on the control socket. */
578 outfd = relayd->control_sock.fd;
579 } else {
580 /* Set header with stream information */
581 data_hdr.stream_id = htobe64(stream->relayd_stream_id);
582 data_hdr.data_size = htobe32(data_size);
1d4dfdef 583 data_hdr.padding_size = htobe32(padding);
173af62f 584 data_hdr.net_seq_num = htobe64(stream->next_net_seq_num++);
00e2e675
DG
585 /* Other fields are zeroed previously */
586
587 ret = relayd_send_data_hdr(&relayd->data_sock, &data_hdr,
588 sizeof(data_hdr));
589 if (ret < 0) {
590 goto error;
591 }
592
593 /* Set to go on data socket */
594 outfd = relayd->data_sock.fd;
595 }
596
597error:
598 return outfd;
599}
600
3bd1e081
MD
601/*
602 * Update a stream according to what we just received.
603 */
604void consumer_change_stream_state(int stream_key,
605 enum lttng_consumer_stream_state state)
606{
607 struct lttng_consumer_stream *stream;
608
609 pthread_mutex_lock(&consumer_data.lock);
610 stream = consumer_find_stream(stream_key);
611 if (stream) {
612 stream->state = state;
613 }
614 consumer_data.need_update = 1;
615 pthread_mutex_unlock(&consumer_data.lock);
616}
617
702b1ea4
MD
618static
619void consumer_free_channel(struct rcu_head *head)
620{
621 struct lttng_ht_node_ulong *node =
622 caa_container_of(head, struct lttng_ht_node_ulong, head);
623 struct lttng_consumer_channel *channel =
624 caa_container_of(node, struct lttng_consumer_channel, node);
625
626 free(channel);
627}
628
3bd1e081
MD
629/*
630 * Remove a channel from the global list protected by a mutex. This
631 * function is also responsible for freeing its data structures.
632 */
633void consumer_del_channel(struct lttng_consumer_channel *channel)
634{
635 int ret;
e4421fec 636 struct lttng_ht_iter iter;
3bd1e081
MD
637
638 pthread_mutex_lock(&consumer_data.lock);
639
640 switch (consumer_data.type) {
641 case LTTNG_CONSUMER_KERNEL:
642 break;
7753dea8
MD
643 case LTTNG_CONSUMER32_UST:
644 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
645 lttng_ustconsumer_del_channel(channel);
646 break;
647 default:
648 ERR("Unknown consumer_data type");
649 assert(0);
650 goto end;
651 }
652
6065ceec 653 rcu_read_lock();
04253271
MD
654 iter.iter.node = &channel->node.node;
655 ret = lttng_ht_del(consumer_data.channel_ht, &iter);
656 assert(!ret);
6065ceec
DG
657 rcu_read_unlock();
658
3bd1e081
MD
659 if (channel->mmap_base != NULL) {
660 ret = munmap(channel->mmap_base, channel->mmap_len);
661 if (ret != 0) {
662 perror("munmap");
663 }
664 }
b5c5fc29 665 if (channel->wait_fd >= 0 && !channel->wait_fd_is_copy) {
4c462e79
MD
666 ret = close(channel->wait_fd);
667 if (ret) {
668 PERROR("close");
669 }
3bd1e081 670 }
2c1dd183 671 if (channel->shm_fd >= 0 && channel->wait_fd != channel->shm_fd) {
4c462e79
MD
672 ret = close(channel->shm_fd);
673 if (ret) {
674 PERROR("close");
675 }
3bd1e081 676 }
702b1ea4
MD
677
678 call_rcu(&channel->node.head, consumer_free_channel);
3bd1e081
MD
679end:
680 pthread_mutex_unlock(&consumer_data.lock);
681}
682
683struct lttng_consumer_channel *consumer_allocate_channel(
684 int channel_key,
685 int shm_fd, int wait_fd,
686 uint64_t mmap_len,
c30aaa51
MD
687 uint64_t max_sb_size,
688 unsigned int nb_init_streams)
3bd1e081
MD
689{
690 struct lttng_consumer_channel *channel;
691 int ret;
692
276b26d1 693 channel = zmalloc(sizeof(*channel));
3bd1e081
MD
694 if (channel == NULL) {
695 perror("malloc struct lttng_consumer_channel");
696 goto end;
697 }
698 channel->key = channel_key;
699 channel->shm_fd = shm_fd;
700 channel->wait_fd = wait_fd;
701 channel->mmap_len = mmap_len;
702 channel->max_sb_size = max_sb_size;
703 channel->refcount = 0;
c30aaa51 704 channel->nb_init_streams = nb_init_streams;
e4421fec 705 lttng_ht_node_init_ulong(&channel->node, channel->key);
3bd1e081
MD
706
707 switch (consumer_data.type) {
708 case LTTNG_CONSUMER_KERNEL:
709 channel->mmap_base = NULL;
710 channel->mmap_len = 0;
711 break;
7753dea8
MD
712 case LTTNG_CONSUMER32_UST:
713 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
714 ret = lttng_ustconsumer_allocate_channel(channel);
715 if (ret) {
716 free(channel);
717 return NULL;
718 }
719 break;
720 default:
721 ERR("Unknown consumer_data type");
722 assert(0);
723 goto end;
724 }
725 DBG("Allocated channel (key %d, shm_fd %d, wait_fd %d, mmap_len %llu, max_sb_size %llu)",
00e2e675 726 channel->key, channel->shm_fd, channel->wait_fd,
3bd1e081
MD
727 (unsigned long long) channel->mmap_len,
728 (unsigned long long) channel->max_sb_size);
729end:
730 return channel;
731}
732
733/*
734 * Add a channel to the global list protected by a mutex.
735 */
736int consumer_add_channel(struct lttng_consumer_channel *channel)
737{
c77fc10a
DG
738 struct lttng_ht_node_ulong *node;
739 struct lttng_ht_iter iter;
740
3bd1e081 741 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
742 /* Steal channel identifier, for UST */
743 consumer_steal_channel_key(channel->key);
6065ceec 744 rcu_read_lock();
c77fc10a
DG
745
746 lttng_ht_lookup(consumer_data.channel_ht,
747 (void *)((unsigned long) channel->key), &iter);
748 node = lttng_ht_iter_get_node_ulong(&iter);
749 if (node != NULL) {
750 /* Channel already exist. Ignore the insertion */
751 goto end;
752 }
753
04253271 754 lttng_ht_add_unique_ulong(consumer_data.channel_ht, &channel->node);
c77fc10a
DG
755
756end:
6065ceec 757 rcu_read_unlock();
3bd1e081 758 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 759
7ad0a0cb 760 return 0;
3bd1e081
MD
761}
762
763/*
764 * Allocate the pollfd structure and the local view of the out fds to avoid
765 * doing a lookup in the linked list and concurrency issues when writing is
766 * needed. Called with consumer_data.lock held.
767 *
768 * Returns the number of fds in the structures.
769 */
770int consumer_update_poll_array(
771 struct lttng_consumer_local_data *ctx, struct pollfd **pollfd,
fb3a43a9 772 struct lttng_consumer_stream **local_stream)
3bd1e081 773{
3bd1e081 774 int i = 0;
e4421fec
DG
775 struct lttng_ht_iter iter;
776 struct lttng_consumer_stream *stream;
3bd1e081
MD
777
778 DBG("Updating poll fd array");
481d6c57 779 rcu_read_lock();
e4421fec
DG
780 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, stream,
781 node.node) {
782 if (stream->state != LTTNG_CONSUMER_ACTIVE_STREAM) {
3bd1e081
MD
783 continue;
784 }
e4421fec
DG
785 DBG("Active FD %d", stream->wait_fd);
786 (*pollfd)[i].fd = stream->wait_fd;
3bd1e081 787 (*pollfd)[i].events = POLLIN | POLLPRI;
e4421fec 788 local_stream[i] = stream;
3bd1e081
MD
789 i++;
790 }
481d6c57 791 rcu_read_unlock();
3bd1e081
MD
792
793 /*
794 * Insert the consumer_poll_pipe at the end of the array and don't
795 * increment i so nb_fd is the number of real FD.
796 */
797 (*pollfd)[i].fd = ctx->consumer_poll_pipe[0];
509bb1cf 798 (*pollfd)[i].events = POLLIN | POLLPRI;
3bd1e081
MD
799 return i;
800}
801
802/*
803 * Poll on the should_quit pipe and the command socket return -1 on error and
804 * should exit, 0 if data is available on the command socket
805 */
806int lttng_consumer_poll_socket(struct pollfd *consumer_sockpoll)
807{
808 int num_rdy;
809
88f2b785 810restart:
3bd1e081
MD
811 num_rdy = poll(consumer_sockpoll, 2, -1);
812 if (num_rdy == -1) {
88f2b785
MD
813 /*
814 * Restart interrupted system call.
815 */
816 if (errno == EINTR) {
817 goto restart;
818 }
3bd1e081
MD
819 perror("Poll error");
820 goto exit;
821 }
509bb1cf 822 if (consumer_sockpoll[0].revents & (POLLIN | POLLPRI)) {
3bd1e081
MD
823 DBG("consumer_should_quit wake up");
824 goto exit;
825 }
826 return 0;
827
828exit:
829 return -1;
830}
831
832/*
833 * Set the error socket.
834 */
835void lttng_consumer_set_error_sock(
836 struct lttng_consumer_local_data *ctx, int sock)
837{
838 ctx->consumer_error_socket = sock;
839}
840
841/*
842 * Set the command socket path.
843 */
3bd1e081
MD
844void lttng_consumer_set_command_sock_path(
845 struct lttng_consumer_local_data *ctx, char *sock)
846{
847 ctx->consumer_command_sock_path = sock;
848}
849
850/*
851 * Send return code to the session daemon.
852 * If the socket is not defined, we return 0, it is not a fatal error
853 */
854int lttng_consumer_send_error(
855 struct lttng_consumer_local_data *ctx, int cmd)
856{
857 if (ctx->consumer_error_socket > 0) {
858 return lttcomm_send_unix_sock(ctx->consumer_error_socket, &cmd,
859 sizeof(enum lttcomm_sessiond_command));
860 }
861
862 return 0;
863}
864
865/*
866 * Close all the tracefiles and stream fds, should be called when all instances
867 * are destroyed.
868 */
869void lttng_consumer_cleanup(void)
870{
e4421fec 871 struct lttng_ht_iter iter;
6065ceec
DG
872 struct lttng_ht_node_ulong *node;
873
874 rcu_read_lock();
3bd1e081
MD
875
876 /*
6065ceec
DG
877 * close all outfd. Called when there are no more threads running (after
878 * joining on the threads), no need to protect list iteration with mutex.
3bd1e081 879 */
6065ceec
DG
880 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, node,
881 node) {
702b1ea4
MD
882 struct lttng_consumer_stream *stream =
883 caa_container_of(node, struct lttng_consumer_stream, node);
884 consumer_del_stream(stream);
3bd1e081 885 }
e4421fec 886
6065ceec
DG
887 cds_lfht_for_each_entry(consumer_data.channel_ht->ht, &iter.iter, node,
888 node) {
702b1ea4
MD
889 struct lttng_consumer_channel *channel =
890 caa_container_of(node, struct lttng_consumer_channel, node);
891 consumer_del_channel(channel);
3bd1e081 892 }
6065ceec
DG
893
894 rcu_read_unlock();
d6ce1df2
MD
895
896 lttng_ht_destroy(consumer_data.stream_ht);
897 lttng_ht_destroy(consumer_data.channel_ht);
3bd1e081
MD
898}
899
900/*
901 * Called from signal handler.
902 */
903void lttng_consumer_should_exit(struct lttng_consumer_local_data *ctx)
904{
905 int ret;
906 consumer_quit = 1;
6f94560a
MD
907 do {
908 ret = write(ctx->consumer_should_quit[1], "4", 1);
909 } while (ret < 0 && errno == EINTR);
3bd1e081
MD
910 if (ret < 0) {
911 perror("write consumer quit");
912 }
913}
914
00e2e675
DG
915void lttng_consumer_sync_trace_file(struct lttng_consumer_stream *stream,
916 off_t orig_offset)
3bd1e081
MD
917{
918 int outfd = stream->out_fd;
919
920 /*
921 * This does a blocking write-and-wait on any page that belongs to the
922 * subbuffer prior to the one we just wrote.
923 * Don't care about error values, as these are just hints and ways to
924 * limit the amount of page cache used.
925 */
926 if (orig_offset < stream->chan->max_sb_size) {
927 return;
928 }
b9182dd9 929 lttng_sync_file_range(outfd, orig_offset - stream->chan->max_sb_size,
3bd1e081
MD
930 stream->chan->max_sb_size,
931 SYNC_FILE_RANGE_WAIT_BEFORE
932 | SYNC_FILE_RANGE_WRITE
933 | SYNC_FILE_RANGE_WAIT_AFTER);
934 /*
935 * Give hints to the kernel about how we access the file:
936 * POSIX_FADV_DONTNEED : we won't re-access data in a near future after
937 * we write it.
938 *
939 * We need to call fadvise again after the file grows because the
940 * kernel does not seem to apply fadvise to non-existing parts of the
941 * file.
942 *
943 * Call fadvise _after_ having waited for the page writeback to
944 * complete because the dirty page writeback semantic is not well
945 * defined. So it can be expected to lead to lower throughput in
946 * streaming.
947 */
948 posix_fadvise(outfd, orig_offset - stream->chan->max_sb_size,
949 stream->chan->max_sb_size, POSIX_FADV_DONTNEED);
950}
951
952/*
953 * Initialise the necessary environnement :
954 * - create a new context
955 * - create the poll_pipe
956 * - create the should_quit pipe (for signal handler)
957 * - create the thread pipe (for splice)
958 *
959 * Takes a function pointer as argument, this function is called when data is
960 * available on a buffer. This function is responsible to do the
961 * kernctl_get_next_subbuf, read the data with mmap or splice depending on the
962 * buffer configuration and then kernctl_put_next_subbuf at the end.
963 *
964 * Returns a pointer to the new context or NULL on error.
965 */
966struct lttng_consumer_local_data *lttng_consumer_create(
967 enum lttng_consumer_type type,
4078b776 968 ssize_t (*buffer_ready)(struct lttng_consumer_stream *stream,
d41f73b7 969 struct lttng_consumer_local_data *ctx),
3bd1e081
MD
970 int (*recv_channel)(struct lttng_consumer_channel *channel),
971 int (*recv_stream)(struct lttng_consumer_stream *stream),
972 int (*update_stream)(int stream_key, uint32_t state))
973{
974 int ret, i;
975 struct lttng_consumer_local_data *ctx;
976
977 assert(consumer_data.type == LTTNG_CONSUMER_UNKNOWN ||
978 consumer_data.type == type);
979 consumer_data.type = type;
980
effcf122 981 ctx = zmalloc(sizeof(struct lttng_consumer_local_data));
3bd1e081
MD
982 if (ctx == NULL) {
983 perror("allocating context");
984 goto error;
985 }
986
987 ctx->consumer_error_socket = -1;
988 /* assign the callbacks */
989 ctx->on_buffer_ready = buffer_ready;
990 ctx->on_recv_channel = recv_channel;
991 ctx->on_recv_stream = recv_stream;
992 ctx->on_update_stream = update_stream;
993
994 ret = pipe(ctx->consumer_poll_pipe);
995 if (ret < 0) {
996 perror("Error creating poll pipe");
997 goto error_poll_pipe;
998 }
999
04fdd819
MD
1000 /* set read end of the pipe to non-blocking */
1001 ret = fcntl(ctx->consumer_poll_pipe[0], F_SETFL, O_NONBLOCK);
1002 if (ret < 0) {
1003 perror("fcntl O_NONBLOCK");
1004 goto error_poll_fcntl;
1005 }
1006
1007 /* set write end of the pipe to non-blocking */
1008 ret = fcntl(ctx->consumer_poll_pipe[1], F_SETFL, O_NONBLOCK);
1009 if (ret < 0) {
1010 perror("fcntl O_NONBLOCK");
1011 goto error_poll_fcntl;
1012 }
1013
3bd1e081
MD
1014 ret = pipe(ctx->consumer_should_quit);
1015 if (ret < 0) {
1016 perror("Error creating recv pipe");
1017 goto error_quit_pipe;
1018 }
1019
1020 ret = pipe(ctx->consumer_thread_pipe);
1021 if (ret < 0) {
1022 perror("Error creating thread pipe");
1023 goto error_thread_pipe;
1024 }
1025
fb3a43a9
DG
1026 ret = utils_create_pipe(ctx->consumer_metadata_pipe);
1027 if (ret < 0) {
1028 goto error_metadata_pipe;
1029 }
3bd1e081 1030
fb3a43a9
DG
1031 ret = utils_create_pipe(ctx->consumer_splice_metadata_pipe);
1032 if (ret < 0) {
1033 goto error_splice_pipe;
1034 }
1035
1036 return ctx;
3bd1e081 1037
fb3a43a9
DG
1038error_splice_pipe:
1039 utils_close_pipe(ctx->consumer_metadata_pipe);
1040error_metadata_pipe:
1041 utils_close_pipe(ctx->consumer_thread_pipe);
3bd1e081
MD
1042error_thread_pipe:
1043 for (i = 0; i < 2; i++) {
1044 int err;
1045
1046 err = close(ctx->consumer_should_quit[i]);
4c462e79
MD
1047 if (err) {
1048 PERROR("close");
1049 }
3bd1e081 1050 }
04fdd819 1051error_poll_fcntl:
3bd1e081
MD
1052error_quit_pipe:
1053 for (i = 0; i < 2; i++) {
1054 int err;
1055
1056 err = close(ctx->consumer_poll_pipe[i]);
4c462e79
MD
1057 if (err) {
1058 PERROR("close");
1059 }
3bd1e081
MD
1060 }
1061error_poll_pipe:
1062 free(ctx);
1063error:
1064 return NULL;
1065}
1066
1067/*
1068 * Close all fds associated with the instance and free the context.
1069 */
1070void lttng_consumer_destroy(struct lttng_consumer_local_data *ctx)
1071{
4c462e79
MD
1072 int ret;
1073
1074 ret = close(ctx->consumer_error_socket);
1075 if (ret) {
1076 PERROR("close");
1077 }
1078 ret = close(ctx->consumer_thread_pipe[0]);
1079 if (ret) {
1080 PERROR("close");
1081 }
1082 ret = close(ctx->consumer_thread_pipe[1]);
1083 if (ret) {
1084 PERROR("close");
1085 }
1086 ret = close(ctx->consumer_poll_pipe[0]);
1087 if (ret) {
1088 PERROR("close");
1089 }
1090 ret = close(ctx->consumer_poll_pipe[1]);
1091 if (ret) {
1092 PERROR("close");
1093 }
1094 ret = close(ctx->consumer_should_quit[0]);
1095 if (ret) {
1096 PERROR("close");
1097 }
1098 ret = close(ctx->consumer_should_quit[1]);
1099 if (ret) {
1100 PERROR("close");
1101 }
fb3a43a9
DG
1102 utils_close_pipe(ctx->consumer_splice_metadata_pipe);
1103
3bd1e081
MD
1104 unlink(ctx->consumer_command_sock_path);
1105 free(ctx);
1106}
1107
6197aea7
DG
1108/*
1109 * Write the metadata stream id on the specified file descriptor.
1110 */
1111static int write_relayd_metadata_id(int fd,
1112 struct lttng_consumer_stream *stream,
1d4dfdef
DG
1113 struct consumer_relayd_sock_pair *relayd,
1114 unsigned long padding)
6197aea7
DG
1115{
1116 int ret;
1d4dfdef 1117 struct lttcomm_relayd_metadata_payload hdr;
6197aea7 1118
1d4dfdef
DG
1119 hdr.stream_id = htobe64(stream->relayd_stream_id);
1120 hdr.padding_size = htobe32(padding);
6197aea7 1121 do {
1d4dfdef 1122 ret = write(fd, (void *) &hdr, sizeof(hdr));
6197aea7
DG
1123 } while (ret < 0 && errno == EINTR);
1124 if (ret < 0) {
1125 PERROR("write metadata stream id");
1126 goto end;
1127 }
1d4dfdef
DG
1128 DBG("Metadata stream id %" PRIu64 " with padding %lu written before data",
1129 stream->relayd_stream_id, padding);
6197aea7
DG
1130
1131end:
1132 return ret;
1133}
1134
3bd1e081 1135/*
09e26845
DG
1136 * Mmap the ring buffer, read it and write the data to the tracefile. This is a
1137 * core function for writing trace buffers to either the local filesystem or
1138 * the network.
1139 *
1140 * Careful review MUST be put if any changes occur!
3bd1e081
MD
1141 *
1142 * Returns the number of bytes written
1143 */
4078b776 1144ssize_t lttng_consumer_on_read_subbuffer_mmap(
3bd1e081 1145 struct lttng_consumer_local_data *ctx,
1d4dfdef
DG
1146 struct lttng_consumer_stream *stream, unsigned long len,
1147 unsigned long padding)
3bd1e081 1148{
f02e1e8a
DG
1149 unsigned long mmap_offset;
1150 ssize_t ret = 0, written = 0;
1151 off_t orig_offset = stream->out_fd_offset;
1152 /* Default is on the disk */
1153 int outfd = stream->out_fd;
f02e1e8a
DG
1154 struct consumer_relayd_sock_pair *relayd = NULL;
1155
1156 /* RCU lock for the relayd pointer */
1157 rcu_read_lock();
1158
1159 /* Flag that the current stream if set for network streaming. */
1160 if (stream->net_seq_idx != -1) {
1161 relayd = consumer_find_relayd(stream->net_seq_idx);
1162 if (relayd == NULL) {
1163 goto end;
1164 }
1165 }
1166
1167 /* get the offset inside the fd to mmap */
3bd1e081
MD
1168 switch (consumer_data.type) {
1169 case LTTNG_CONSUMER_KERNEL:
f02e1e8a
DG
1170 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
1171 break;
7753dea8
MD
1172 case LTTNG_CONSUMER32_UST:
1173 case LTTNG_CONSUMER64_UST:
f02e1e8a
DG
1174 ret = lttng_ustctl_get_mmap_read_offset(stream->chan->handle,
1175 stream->buf, &mmap_offset);
1176 break;
3bd1e081
MD
1177 default:
1178 ERR("Unknown consumer_data type");
1179 assert(0);
1180 }
f02e1e8a
DG
1181 if (ret != 0) {
1182 errno = -ret;
1183 PERROR("tracer ctl get_mmap_read_offset");
1184 written = ret;
1185 goto end;
1186 }
b9182dd9 1187
f02e1e8a
DG
1188 /* Handle stream on the relayd if the output is on the network */
1189 if (relayd) {
1190 unsigned long netlen = len;
1191
1192 /*
1193 * Lock the control socket for the complete duration of the function
1194 * since from this point on we will use the socket.
1195 */
1196 if (stream->metadata_flag) {
1197 /* Metadata requires the control socket. */
1198 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1d4dfdef 1199 netlen += sizeof(struct lttcomm_relayd_metadata_payload);
f02e1e8a
DG
1200 }
1201
1d4dfdef 1202 ret = write_relayd_stream_header(stream, netlen, padding, relayd);
f02e1e8a
DG
1203 if (ret >= 0) {
1204 /* Use the returned socket. */
1205 outfd = ret;
1206
1207 /* Write metadata stream id before payload */
1208 if (stream->metadata_flag) {
1d4dfdef 1209 ret = write_relayd_metadata_id(outfd, stream, relayd, padding);
f02e1e8a 1210 if (ret < 0) {
f02e1e8a
DG
1211 written = ret;
1212 goto end;
1213 }
f02e1e8a
DG
1214 }
1215 }
1216 /* Else, use the default set before which is the filesystem. */
1d4dfdef
DG
1217 } else {
1218 /* No streaming, we have to set the len with the full padding */
1219 len += padding;
f02e1e8a
DG
1220 }
1221
1222 while (len > 0) {
1223 do {
1224 ret = write(outfd, stream->mmap_base + mmap_offset, len);
1225 } while (ret < 0 && errno == EINTR);
1d4dfdef 1226 DBG("Consumer mmap write() ret %zd (len %lu)", ret, len);
f02e1e8a
DG
1227 if (ret < 0) {
1228 PERROR("Error in file write");
1229 if (written == 0) {
1230 written = ret;
1231 }
1232 goto end;
1233 } else if (ret > len) {
77c7c900 1234 PERROR("Error in file write (ret %zd > len %lu)", ret, len);
f02e1e8a
DG
1235 written += ret;
1236 goto end;
1237 } else {
1238 len -= ret;
1239 mmap_offset += ret;
1240 }
f02e1e8a
DG
1241
1242 /* This call is useless on a socket so better save a syscall. */
1243 if (!relayd) {
1244 /* This won't block, but will start writeout asynchronously */
1245 lttng_sync_file_range(outfd, stream->out_fd_offset, ret,
1246 SYNC_FILE_RANGE_WRITE);
1247 stream->out_fd_offset += ret;
1248 }
1249 written += ret;
1250 }
1251 lttng_consumer_sync_trace_file(stream, orig_offset);
1252
1253end:
1254 /* Unlock only if ctrl socket used */
1255 if (relayd && stream->metadata_flag) {
1256 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1257 }
1258
1259 rcu_read_unlock();
1260 return written;
3bd1e081
MD
1261}
1262
1263/*
1264 * Splice the data from the ring buffer to the tracefile.
1265 *
1266 * Returns the number of bytes spliced.
1267 */
4078b776 1268ssize_t lttng_consumer_on_read_subbuffer_splice(
3bd1e081 1269 struct lttng_consumer_local_data *ctx,
1d4dfdef
DG
1270 struct lttng_consumer_stream *stream, unsigned long len,
1271 unsigned long padding)
3bd1e081 1272{
f02e1e8a
DG
1273 ssize_t ret = 0, written = 0, ret_splice = 0;
1274 loff_t offset = 0;
1275 off_t orig_offset = stream->out_fd_offset;
1276 int fd = stream->wait_fd;
1277 /* Default is on the disk */
1278 int outfd = stream->out_fd;
f02e1e8a 1279 struct consumer_relayd_sock_pair *relayd = NULL;
fb3a43a9 1280 int *splice_pipe;
f02e1e8a 1281
3bd1e081
MD
1282 switch (consumer_data.type) {
1283 case LTTNG_CONSUMER_KERNEL:
f02e1e8a 1284 break;
7753dea8
MD
1285 case LTTNG_CONSUMER32_UST:
1286 case LTTNG_CONSUMER64_UST:
f02e1e8a 1287 /* Not supported for user space tracing */
3bd1e081
MD
1288 return -ENOSYS;
1289 default:
1290 ERR("Unknown consumer_data type");
1291 assert(0);
3bd1e081
MD
1292 }
1293
f02e1e8a
DG
1294 /* RCU lock for the relayd pointer */
1295 rcu_read_lock();
1296
1297 /* Flag that the current stream if set for network streaming. */
1298 if (stream->net_seq_idx != -1) {
1299 relayd = consumer_find_relayd(stream->net_seq_idx);
1300 if (relayd == NULL) {
1301 goto end;
1302 }
1303 }
1304
fb3a43a9
DG
1305 /*
1306 * Choose right pipe for splice. Metadata and trace data are handled by
1307 * different threads hence the use of two pipes in order not to race or
1308 * corrupt the written data.
1309 */
1310 if (stream->metadata_flag) {
1311 splice_pipe = ctx->consumer_splice_metadata_pipe;
1312 } else {
1313 splice_pipe = ctx->consumer_thread_pipe;
1314 }
1315
f02e1e8a 1316 /* Write metadata stream id before payload */
1d4dfdef
DG
1317 if (relayd) {
1318 int total_len = len;
f02e1e8a 1319
1d4dfdef
DG
1320 if (stream->metadata_flag) {
1321 /*
1322 * Lock the control socket for the complete duration of the function
1323 * since from this point on we will use the socket.
1324 */
1325 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1326
1327 ret = write_relayd_metadata_id(splice_pipe[1], stream, relayd,
1328 padding);
1329 if (ret < 0) {
1330 written = ret;
1331 goto end;
1332 }
1333
1334 total_len += sizeof(struct lttcomm_relayd_metadata_payload);
1335 }
1336
1337 ret = write_relayd_stream_header(stream, total_len, padding, relayd);
1338 if (ret >= 0) {
1339 /* Use the returned socket. */
1340 outfd = ret;
1341 } else {
1342 ERR("Remote relayd disconnected. Stopping");
f02e1e8a
DG
1343 goto end;
1344 }
1d4dfdef
DG
1345 } else {
1346 /* No streaming, we have to set the len with the full padding */
1347 len += padding;
f02e1e8a
DG
1348 }
1349
1350 while (len > 0) {
1d4dfdef
DG
1351 DBG("splice chan to pipe offset %lu of len %lu (fd : %d, pipe: %d)",
1352 (unsigned long)offset, len, fd, splice_pipe[1]);
fb3a43a9 1353 ret_splice = splice(fd, &offset, splice_pipe[1], NULL, len,
f02e1e8a
DG
1354 SPLICE_F_MOVE | SPLICE_F_MORE);
1355 DBG("splice chan to pipe, ret %zd", ret_splice);
1356 if (ret_splice < 0) {
1357 PERROR("Error in relay splice");
1358 if (written == 0) {
1359 written = ret_splice;
1360 }
1361 ret = errno;
1362 goto splice_error;
1363 }
1364
1365 /* Handle stream on the relayd if the output is on the network */
1366 if (relayd) {
1367 if (stream->metadata_flag) {
1d4dfdef
DG
1368 size_t metadata_payload_size =
1369 sizeof(struct lttcomm_relayd_metadata_payload);
1370
f02e1e8a 1371 /* Update counter to fit the spliced data */
1d4dfdef
DG
1372 ret_splice += metadata_payload_size;
1373 len += metadata_payload_size;
f02e1e8a
DG
1374 /*
1375 * We do this so the return value can match the len passed as
1376 * argument to this function.
1377 */
1d4dfdef 1378 written -= metadata_payload_size;
f02e1e8a
DG
1379 }
1380 }
1381
1382 /* Splice data out */
fb3a43a9 1383 ret_splice = splice(splice_pipe[0], NULL, outfd, NULL,
f02e1e8a 1384 ret_splice, SPLICE_F_MOVE | SPLICE_F_MORE);
1d4dfdef 1385 DBG("Consumer splice pipe to file, ret %zd", ret_splice);
f02e1e8a
DG
1386 if (ret_splice < 0) {
1387 PERROR("Error in file splice");
1388 if (written == 0) {
1389 written = ret_splice;
1390 }
1391 ret = errno;
1392 goto splice_error;
1393 } else if (ret_splice > len) {
1394 errno = EINVAL;
1395 PERROR("Wrote more data than requested %zd (len: %lu)",
1396 ret_splice, len);
1397 written += ret_splice;
1398 ret = errno;
1399 goto splice_error;
1400 }
1401 len -= ret_splice;
1402
1403 /* This call is useless on a socket so better save a syscall. */
1404 if (!relayd) {
1405 /* This won't block, but will start writeout asynchronously */
1406 lttng_sync_file_range(outfd, stream->out_fd_offset, ret_splice,
1407 SYNC_FILE_RANGE_WRITE);
1408 stream->out_fd_offset += ret_splice;
1409 }
1410 written += ret_splice;
1411 }
1412 lttng_consumer_sync_trace_file(stream, orig_offset);
1413
1414 ret = ret_splice;
1415
1416 goto end;
1417
1418splice_error:
1419 /* send the appropriate error description to sessiond */
1420 switch (ret) {
1421 case EBADF:
f73fabfd 1422 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_EBADF);
f02e1e8a
DG
1423 break;
1424 case EINVAL:
f73fabfd 1425 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_EINVAL);
f02e1e8a
DG
1426 break;
1427 case ENOMEM:
f73fabfd 1428 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_ENOMEM);
f02e1e8a
DG
1429 break;
1430 case ESPIPE:
f73fabfd 1431 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_ESPIPE);
f02e1e8a
DG
1432 break;
1433 }
1434
1435end:
1436 if (relayd && stream->metadata_flag) {
1437 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1438 }
1439
1440 rcu_read_unlock();
1441 return written;
3bd1e081
MD
1442}
1443
1444/*
1445 * Take a snapshot for a specific fd
1446 *
1447 * Returns 0 on success, < 0 on error
1448 */
1449int lttng_consumer_take_snapshot(struct lttng_consumer_local_data *ctx,
1450 struct lttng_consumer_stream *stream)
1451{
1452 switch (consumer_data.type) {
1453 case LTTNG_CONSUMER_KERNEL:
1454 return lttng_kconsumer_take_snapshot(ctx, stream);
7753dea8
MD
1455 case LTTNG_CONSUMER32_UST:
1456 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1457 return lttng_ustconsumer_take_snapshot(ctx, stream);
1458 default:
1459 ERR("Unknown consumer_data type");
1460 assert(0);
1461 return -ENOSYS;
1462 }
1463
1464}
1465
1466/*
1467 * Get the produced position
1468 *
1469 * Returns 0 on success, < 0 on error
1470 */
1471int lttng_consumer_get_produced_snapshot(
1472 struct lttng_consumer_local_data *ctx,
1473 struct lttng_consumer_stream *stream,
1474 unsigned long *pos)
1475{
1476 switch (consumer_data.type) {
1477 case LTTNG_CONSUMER_KERNEL:
1478 return lttng_kconsumer_get_produced_snapshot(ctx, stream, pos);
7753dea8
MD
1479 case LTTNG_CONSUMER32_UST:
1480 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1481 return lttng_ustconsumer_get_produced_snapshot(ctx, stream, pos);
1482 default:
1483 ERR("Unknown consumer_data type");
1484 assert(0);
1485 return -ENOSYS;
1486 }
1487}
1488
1489int lttng_consumer_recv_cmd(struct lttng_consumer_local_data *ctx,
1490 int sock, struct pollfd *consumer_sockpoll)
1491{
1492 switch (consumer_data.type) {
1493 case LTTNG_CONSUMER_KERNEL:
1494 return lttng_kconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
7753dea8
MD
1495 case LTTNG_CONSUMER32_UST:
1496 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1497 return lttng_ustconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
1498 default:
1499 ERR("Unknown consumer_data type");
1500 assert(0);
1501 return -ENOSYS;
1502 }
1503}
1504
fb3a43a9
DG
1505/*
1506 * Iterate over all stream element of the hashtable and free them. This is race
1507 * free since the hashtable received MUST be in a race free synchronization
1508 * state. It's the caller responsability to make sure of that.
1509 */
1510static void destroy_stream_ht(struct lttng_ht *ht)
1511{
1512 int ret;
1513 struct lttng_ht_iter iter;
1514 struct lttng_consumer_stream *stream;
1515
1516 if (ht == NULL) {
1517 return;
1518 }
1519
d09e1200 1520 rcu_read_lock();
fb3a43a9
DG
1521 cds_lfht_for_each_entry(ht->ht, &iter.iter, stream, node.node) {
1522 ret = lttng_ht_del(ht, &iter);
1523 assert(!ret);
1524
1525 free(stream);
1526 }
d09e1200 1527 rcu_read_unlock();
fb3a43a9
DG
1528
1529 lttng_ht_destroy(ht);
1530}
1531
1532/*
1533 * Clean up a metadata stream and free its memory.
1534 */
1535static void consumer_del_metadata_stream(struct lttng_consumer_stream *stream)
1536{
1537 int ret;
fb3a43a9
DG
1538 struct consumer_relayd_sock_pair *relayd;
1539
1540 assert(stream);
1541 /*
1542 * This call should NEVER receive regular stream. It must always be
1543 * metadata stream and this is crucial for data structure synchronization.
1544 */
1545 assert(stream->metadata_flag);
1546
1547 pthread_mutex_lock(&consumer_data.lock);
1548 switch (consumer_data.type) {
1549 case LTTNG_CONSUMER_KERNEL:
1550 if (stream->mmap_base != NULL) {
1551 ret = munmap(stream->mmap_base, stream->mmap_len);
1552 if (ret != 0) {
1553 PERROR("munmap metadata stream");
1554 }
1555 }
1556 break;
1557 case LTTNG_CONSUMER32_UST:
1558 case LTTNG_CONSUMER64_UST:
1559 lttng_ustconsumer_del_stream(stream);
1560 break;
1561 default:
1562 ERR("Unknown consumer_data type");
1563 assert(0);
1564 }
1565 pthread_mutex_unlock(&consumer_data.lock);
1566
1567 if (stream->out_fd >= 0) {
1568 ret = close(stream->out_fd);
1569 if (ret) {
1570 PERROR("close");
1571 }
1572 }
1573
1574 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
1575 ret = close(stream->wait_fd);
1576 if (ret) {
1577 PERROR("close");
1578 }
1579 }
1580
1581 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
1582 ret = close(stream->shm_fd);
1583 if (ret) {
1584 PERROR("close");
1585 }
1586 }
1587
1588 /* Check and cleanup relayd */
1589 rcu_read_lock();
1590 relayd = consumer_find_relayd(stream->net_seq_idx);
1591 if (relayd != NULL) {
1592 uatomic_dec(&relayd->refcount);
1593 assert(uatomic_read(&relayd->refcount) >= 0);
1594
1595 /* Closing streams requires to lock the control socket. */
1596 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1597 ret = relayd_send_close_stream(&relayd->control_sock,
1598 stream->relayd_stream_id, stream->next_net_seq_num - 1);
1599 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1600 if (ret < 0) {
1601 DBG("Unable to close stream on the relayd. Continuing");
1602 /*
1603 * Continue here. There is nothing we can do for the relayd.
1604 * Chances are that the relayd has closed the socket so we just
1605 * continue cleaning up.
1606 */
1607 }
1608
1609 /* Both conditions are met, we destroy the relayd. */
1610 if (uatomic_read(&relayd->refcount) == 0 &&
1611 uatomic_read(&relayd->destroy_flag)) {
d09e1200 1612 destroy_relayd(relayd);
fb3a43a9
DG
1613 }
1614 }
1615 rcu_read_unlock();
1616
1617 /* Atomically decrement channel refcount since other threads can use it. */
1618 uatomic_dec(&stream->chan->refcount);
c30aaa51
MD
1619 if (!uatomic_read(&stream->chan->refcount)
1620 && !uatomic_read(&stream->chan->nb_init_streams)) {
1621 /* Go for channel deletion! */
1622 consumer_del_channel(stream->chan);
fb3a43a9
DG
1623 }
1624
1625 free(stream);
1626}
1627
1628/*
1629 * Action done with the metadata stream when adding it to the consumer internal
1630 * data structures to handle it.
1631 */
1632static void consumer_add_metadata_stream(struct lttng_consumer_stream *stream)
1633{
1634 struct consumer_relayd_sock_pair *relayd;
1635
1636 /* Find relayd and, if one is found, increment refcount. */
1637 rcu_read_lock();
1638 relayd = consumer_find_relayd(stream->net_seq_idx);
1639 if (relayd != NULL) {
1640 uatomic_inc(&relayd->refcount);
1641 }
1642 rcu_read_unlock();
1643}
1644
1645/*
1646 * Thread polls on metadata file descriptor and write them on disk or on the
1647 * network.
1648 */
1649void *lttng_consumer_thread_poll_metadata(void *data)
1650{
1651 int ret, i, pollfd;
1652 uint32_t revents, nb_fd;
1653 struct lttng_consumer_stream *stream;
1654 struct lttng_ht_iter iter;
1655 struct lttng_ht_node_ulong *node;
1656 struct lttng_ht *metadata_ht = NULL;
1657 struct lttng_poll_event events;
1658 struct lttng_consumer_local_data *ctx = data;
1659 ssize_t len;
1660
1661 rcu_register_thread();
1662
1663 DBG("Thread metadata poll started");
1664
1665 metadata_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1666 if (metadata_ht == NULL) {
1667 goto end;
1668 }
1669
1670 /* Size is set to 1 for the consumer_metadata pipe */
1671 ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
1672 if (ret < 0) {
1673 ERR("Poll set creation failed");
1674 goto end;
1675 }
1676
1677 ret = lttng_poll_add(&events, ctx->consumer_metadata_pipe[0], LPOLLIN);
1678 if (ret < 0) {
1679 goto end;
1680 }
1681
1682 /* Main loop */
1683 DBG("Metadata main loop started");
1684
1685 while (1) {
1686 lttng_poll_reset(&events);
1687
1688 nb_fd = LTTNG_POLL_GETNB(&events);
1689
1690 /* Only the metadata pipe is set */
1691 if (nb_fd == 0 && consumer_quit == 1) {
1692 goto end;
1693 }
1694
1695restart:
1696 DBG("Metadata poll wait with %d fd(s)", nb_fd);
1697 ret = lttng_poll_wait(&events, -1);
1698 DBG("Metadata event catched in thread");
1699 if (ret < 0) {
1700 if (errno == EINTR) {
1701 goto restart;
1702 }
1703 goto error;
1704 }
1705
1706 for (i = 0; i < nb_fd; i++) {
1707 revents = LTTNG_POLL_GETEV(&events, i);
1708 pollfd = LTTNG_POLL_GETFD(&events, i);
1709
1710 /* Check the metadata pipe for incoming metadata. */
1711 if (pollfd == ctx->consumer_metadata_pipe[0]) {
4adabd61 1712 if (revents & (LPOLLERR | LPOLLHUP )) {
fb3a43a9
DG
1713 DBG("Metadata thread pipe hung up");
1714 /*
1715 * Remove the pipe from the poll set and continue the loop
1716 * since their might be data to consume.
1717 */
1718 lttng_poll_del(&events, ctx->consumer_metadata_pipe[0]);
1719 close(ctx->consumer_metadata_pipe[0]);
1720 continue;
1721 } else if (revents & LPOLLIN) {
1722 stream = zmalloc(sizeof(struct lttng_consumer_stream));
1723 if (stream == NULL) {
1724 PERROR("zmalloc metadata consumer stream");
1725 goto error;
1726 }
1727
1728 do {
1729 /* Get the stream and add it to the local hash table */
1730 ret = read(pollfd, stream,
1731 sizeof(struct lttng_consumer_stream));
1732 } while (ret < 0 && errno == EINTR);
1733 if (ret < 0 || ret < sizeof(struct lttng_consumer_stream)) {
1734 PERROR("read metadata stream");
1735 free(stream);
1736 /*
1737 * Let's continue here and hope we can still work
1738 * without stopping the consumer. XXX: Should we?
1739 */
1740 continue;
1741 }
1742
1743 DBG("Adding metadata stream %d to poll set",
1744 stream->wait_fd);
1745
d09e1200 1746 rcu_read_lock();
fb3a43a9
DG
1747 /* The node should be init at this point */
1748 lttng_ht_add_unique_ulong(metadata_ht,
1749 &stream->waitfd_node);
d09e1200 1750 rcu_read_unlock();
fb3a43a9
DG
1751
1752 /* Add metadata stream to the global poll events list */
1753 lttng_poll_add(&events, stream->wait_fd,
1754 LPOLLIN | LPOLLPRI);
1755
1756 consumer_add_metadata_stream(stream);
1757 }
1758
1759 /* Metadata pipe handled. Continue handling the others */
1760 continue;
1761 }
1762
1763 /* From here, the event is a metadata wait fd */
1764
d09e1200 1765 rcu_read_lock();
fb3a43a9
DG
1766 lttng_ht_lookup(metadata_ht, (void *)((unsigned long) pollfd),
1767 &iter);
1768 node = lttng_ht_iter_get_node_ulong(&iter);
1769 if (node == NULL) {
1770 /* FD not found, continue loop */
d09e1200 1771 rcu_read_unlock();
fb3a43a9
DG
1772 continue;
1773 }
1774
1775 stream = caa_container_of(node, struct lttng_consumer_stream,
1776 waitfd_node);
1777
1778 /* Get the data out of the metadata file descriptor */
1779 if (revents & (LPOLLIN | LPOLLPRI)) {
1780 DBG("Metadata available on fd %d", pollfd);
1781 assert(stream->wait_fd == pollfd);
1782
1783 len = ctx->on_buffer_ready(stream, ctx);
1784 /* It's ok to have an unavailable sub-buffer */
1785 if (len < 0 && len != -EAGAIN) {
d09e1200 1786 rcu_read_unlock();
fb3a43a9
DG
1787 goto end;
1788 } else if (len > 0) {
1789 stream->data_read = 1;
1790 }
1791 }
1792
1793 /*
1794 * Remove the stream from the hash table since there is no data
1795 * left on the fd because we previously did a read on the buffer.
1796 */
4adabd61 1797 if (revents & (LPOLLERR | LPOLLHUP)) {
fb3a43a9
DG
1798 DBG("Metadata fd %d is hup|err|nval.", pollfd);
1799 if (!stream->hangup_flush_done
1800 && (consumer_data.type == LTTNG_CONSUMER32_UST
1801 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
1802 DBG("Attempting to flush and consume the UST buffers");
1803 lttng_ustconsumer_on_stream_hangup(stream);
1804
1805 /* We just flushed the stream now read it. */
1806 len = ctx->on_buffer_ready(stream, ctx);
1807 /* It's ok to have an unavailable sub-buffer */
1808 if (len < 0 && len != -EAGAIN) {
d09e1200 1809 rcu_read_unlock();
fb3a43a9
DG
1810 goto end;
1811 }
1812 }
1813
1814 /* Removing it from hash table, poll set and free memory */
1815 lttng_ht_del(metadata_ht, &iter);
d09e1200 1816
fb3a43a9
DG
1817 lttng_poll_del(&events, stream->wait_fd);
1818 consumer_del_metadata_stream(stream);
1819 }
d09e1200 1820 rcu_read_unlock();
fb3a43a9
DG
1821 }
1822 }
1823
1824error:
1825end:
1826 DBG("Metadata poll thread exiting");
1827 lttng_poll_clean(&events);
1828
1829 if (metadata_ht) {
1830 destroy_stream_ht(metadata_ht);
1831 }
1832
1833 rcu_unregister_thread();
1834 return NULL;
1835}
1836
3bd1e081 1837/*
e4421fec 1838 * This thread polls the fds in the set to consume the data and write
3bd1e081
MD
1839 * it to tracefile if necessary.
1840 */
1841void *lttng_consumer_thread_poll_fds(void *data)
1842{
1843 int num_rdy, num_hup, high_prio, ret, i;
1844 struct pollfd *pollfd = NULL;
1845 /* local view of the streams */
1846 struct lttng_consumer_stream **local_stream = NULL;
1847 /* local view of consumer_data.fds_count */
1848 int nb_fd = 0;
3bd1e081 1849 struct lttng_consumer_local_data *ctx = data;
00e2e675 1850 ssize_t len;
fb3a43a9
DG
1851 pthread_t metadata_thread;
1852 void *status;
3bd1e081 1853
e7b994a3
DG
1854 rcu_register_thread();
1855
fb3a43a9
DG
1856 /* Start metadata polling thread */
1857 ret = pthread_create(&metadata_thread, NULL,
1858 lttng_consumer_thread_poll_metadata, (void *) ctx);
1859 if (ret < 0) {
1860 PERROR("pthread_create metadata thread");
1861 goto end;
1862 }
1863
effcf122 1864 local_stream = zmalloc(sizeof(struct lttng_consumer_stream));
3bd1e081
MD
1865
1866 while (1) {
1867 high_prio = 0;
1868 num_hup = 0;
1869
1870 /*
e4421fec 1871 * the fds set has been updated, we need to update our
3bd1e081
MD
1872 * local array as well
1873 */
1874 pthread_mutex_lock(&consumer_data.lock);
1875 if (consumer_data.need_update) {
1876 if (pollfd != NULL) {
1877 free(pollfd);
1878 pollfd = NULL;
1879 }
1880 if (local_stream != NULL) {
1881 free(local_stream);
1882 local_stream = NULL;
1883 }
1884
1885 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1886 pollfd = zmalloc((consumer_data.stream_count + 1) * sizeof(struct pollfd));
3bd1e081
MD
1887 if (pollfd == NULL) {
1888 perror("pollfd malloc");
1889 pthread_mutex_unlock(&consumer_data.lock);
1890 goto end;
1891 }
1892
1893 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1894 local_stream = zmalloc((consumer_data.stream_count + 1) *
3bd1e081
MD
1895 sizeof(struct lttng_consumer_stream));
1896 if (local_stream == NULL) {
1897 perror("local_stream malloc");
1898 pthread_mutex_unlock(&consumer_data.lock);
1899 goto end;
1900 }
fb3a43a9 1901 ret = consumer_update_poll_array(ctx, &pollfd, local_stream);
3bd1e081
MD
1902 if (ret < 0) {
1903 ERR("Error in allocating pollfd or local_outfds");
f73fabfd 1904 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
3bd1e081
MD
1905 pthread_mutex_unlock(&consumer_data.lock);
1906 goto end;
1907 }
1908 nb_fd = ret;
1909 consumer_data.need_update = 0;
1910 }
1911 pthread_mutex_unlock(&consumer_data.lock);
1912
4078b776
MD
1913 /* No FDs and consumer_quit, consumer_cleanup the thread */
1914 if (nb_fd == 0 && consumer_quit == 1) {
1915 goto end;
1916 }
3bd1e081 1917 /* poll on the array of fds */
88f2b785 1918 restart:
3bd1e081
MD
1919 DBG("polling on %d fd", nb_fd + 1);
1920 num_rdy = poll(pollfd, nb_fd + 1, consumer_poll_timeout);
1921 DBG("poll num_rdy : %d", num_rdy);
1922 if (num_rdy == -1) {
88f2b785
MD
1923 /*
1924 * Restart interrupted system call.
1925 */
1926 if (errno == EINTR) {
1927 goto restart;
1928 }
3bd1e081 1929 perror("Poll error");
f73fabfd 1930 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
3bd1e081
MD
1931 goto end;
1932 } else if (num_rdy == 0) {
1933 DBG("Polling thread timed out");
1934 goto end;
1935 }
1936
3bd1e081 1937 /*
00e2e675
DG
1938 * If the consumer_poll_pipe triggered poll go directly to the
1939 * beginning of the loop to update the array. We want to prioritize
1940 * array update over low-priority reads.
3bd1e081 1941 */
509bb1cf 1942 if (pollfd[nb_fd].revents & (POLLIN | POLLPRI)) {
04fdd819
MD
1943 size_t pipe_readlen;
1944 char tmp;
1945
3bd1e081 1946 DBG("consumer_poll_pipe wake up");
04fdd819
MD
1947 /* Consume 1 byte of pipe data */
1948 do {
1949 pipe_readlen = read(ctx->consumer_poll_pipe[0], &tmp, 1);
1950 } while (pipe_readlen == -1 && errno == EINTR);
3bd1e081
MD
1951 continue;
1952 }
1953
1954 /* Take care of high priority channels first. */
1955 for (i = 0; i < nb_fd; i++) {
fb3a43a9 1956 if (pollfd[i].revents & POLLPRI) {
d41f73b7
MD
1957 DBG("Urgent read on fd %d", pollfd[i].fd);
1958 high_prio = 1;
4078b776 1959 len = ctx->on_buffer_ready(local_stream[i], ctx);
d41f73b7 1960 /* it's ok to have an unavailable sub-buffer */
4078b776
MD
1961 if (len < 0 && len != -EAGAIN) {
1962 goto end;
1963 } else if (len > 0) {
1964 local_stream[i]->data_read = 1;
d41f73b7 1965 }
3bd1e081
MD
1966 }
1967 }
1968
4078b776
MD
1969 /*
1970 * If we read high prio channel in this loop, try again
1971 * for more high prio data.
1972 */
1973 if (high_prio) {
3bd1e081
MD
1974 continue;
1975 }
1976
1977 /* Take care of low priority channels. */
4078b776
MD
1978 for (i = 0; i < nb_fd; i++) {
1979 if ((pollfd[i].revents & POLLIN) ||
1980 local_stream[i]->hangup_flush_done) {
4078b776
MD
1981 DBG("Normal read on fd %d", pollfd[i].fd);
1982 len = ctx->on_buffer_ready(local_stream[i], ctx);
1983 /* it's ok to have an unavailable sub-buffer */
1984 if (len < 0 && len != -EAGAIN) {
1985 goto end;
1986 } else if (len > 0) {
1987 local_stream[i]->data_read = 1;
1988 }
1989 }
1990 }
1991
1992 /* Handle hangup and errors */
1993 for (i = 0; i < nb_fd; i++) {
1994 if (!local_stream[i]->hangup_flush_done
1995 && (pollfd[i].revents & (POLLHUP | POLLERR | POLLNVAL))
1996 && (consumer_data.type == LTTNG_CONSUMER32_UST
1997 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
1998 DBG("fd %d is hup|err|nval. Attempting flush and read.",
1999 pollfd[i].fd);
2000 lttng_ustconsumer_on_stream_hangup(local_stream[i]);
2001 /* Attempt read again, for the data we just flushed. */
2002 local_stream[i]->data_read = 1;
2003 }
2004 /*
2005 * If the poll flag is HUP/ERR/NVAL and we have
2006 * read no data in this pass, we can remove the
2007 * stream from its hash table.
2008 */
2009 if ((pollfd[i].revents & POLLHUP)) {
2010 DBG("Polling fd %d tells it has hung up.", pollfd[i].fd);
2011 if (!local_stream[i]->data_read) {
702b1ea4 2012 consumer_del_stream(local_stream[i]);
4078b776
MD
2013 num_hup++;
2014 }
2015 } else if (pollfd[i].revents & POLLERR) {
2016 ERR("Error returned in polling fd %d.", pollfd[i].fd);
2017 if (!local_stream[i]->data_read) {
702b1ea4 2018 consumer_del_stream(local_stream[i]);
4078b776
MD
2019 num_hup++;
2020 }
2021 } else if (pollfd[i].revents & POLLNVAL) {
2022 ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
2023 if (!local_stream[i]->data_read) {
702b1ea4 2024 consumer_del_stream(local_stream[i]);
4078b776 2025 num_hup++;
3bd1e081
MD
2026 }
2027 }
4078b776 2028 local_stream[i]->data_read = 0;
3bd1e081
MD
2029 }
2030 }
2031end:
2032 DBG("polling thread exiting");
2033 if (pollfd != NULL) {
2034 free(pollfd);
2035 pollfd = NULL;
2036 }
2037 if (local_stream != NULL) {
2038 free(local_stream);
2039 local_stream = NULL;
2040 }
fb3a43a9
DG
2041
2042 /*
2043 * Close the write side of the pipe so epoll_wait() in
2044 * lttng_consumer_thread_poll_metadata can catch it. The thread is
2045 * monitoring the read side of the pipe. If we close them both, epoll_wait
2046 * strangely does not return and could create a endless wait period if the
2047 * pipe is the only tracked fd in the poll set. The thread will take care
2048 * of closing the read side.
2049 */
2050 close(ctx->consumer_metadata_pipe[1]);
2051 if (ret) {
2052 ret = pthread_join(metadata_thread, &status);
2053 if (ret < 0) {
2054 PERROR("pthread_join metadata thread");
2055 }
2056 }
2057
e7b994a3 2058 rcu_unregister_thread();
3bd1e081
MD
2059 return NULL;
2060}
2061
2062/*
2063 * This thread listens on the consumerd socket and receives the file
2064 * descriptors from the session daemon.
2065 */
2066void *lttng_consumer_thread_receive_fds(void *data)
2067{
2068 int sock, client_socket, ret;
2069 /*
2070 * structure to poll for incoming data on communication socket avoids
2071 * making blocking sockets.
2072 */
2073 struct pollfd consumer_sockpoll[2];
2074 struct lttng_consumer_local_data *ctx = data;
2075
e7b994a3
DG
2076 rcu_register_thread();
2077
3bd1e081
MD
2078 DBG("Creating command socket %s", ctx->consumer_command_sock_path);
2079 unlink(ctx->consumer_command_sock_path);
2080 client_socket = lttcomm_create_unix_sock(ctx->consumer_command_sock_path);
2081 if (client_socket < 0) {
2082 ERR("Cannot create command socket");
2083 goto end;
2084 }
2085
2086 ret = lttcomm_listen_unix_sock(client_socket);
2087 if (ret < 0) {
2088 goto end;
2089 }
2090
32258573 2091 DBG("Sending ready command to lttng-sessiond");
f73fabfd 2092 ret = lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_COMMAND_SOCK_READY);
3bd1e081
MD
2093 /* return < 0 on error, but == 0 is not fatal */
2094 if (ret < 0) {
32258573 2095 ERR("Error sending ready command to lttng-sessiond");
3bd1e081
MD
2096 goto end;
2097 }
2098
2099 ret = fcntl(client_socket, F_SETFL, O_NONBLOCK);
2100 if (ret < 0) {
2101 perror("fcntl O_NONBLOCK");
2102 goto end;
2103 }
2104
2105 /* prepare the FDs to poll : to client socket and the should_quit pipe */
2106 consumer_sockpoll[0].fd = ctx->consumer_should_quit[0];
2107 consumer_sockpoll[0].events = POLLIN | POLLPRI;
2108 consumer_sockpoll[1].fd = client_socket;
2109 consumer_sockpoll[1].events = POLLIN | POLLPRI;
2110
2111 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2112 goto end;
2113 }
2114 DBG("Connection on client_socket");
2115
2116 /* Blocking call, waiting for transmission */
2117 sock = lttcomm_accept_unix_sock(client_socket);
2118 if (sock <= 0) {
2119 WARN("On accept");
2120 goto end;
2121 }
2122 ret = fcntl(sock, F_SETFL, O_NONBLOCK);
2123 if (ret < 0) {
2124 perror("fcntl O_NONBLOCK");
2125 goto end;
2126 }
2127
2128 /* update the polling structure to poll on the established socket */
2129 consumer_sockpoll[1].fd = sock;
2130 consumer_sockpoll[1].events = POLLIN | POLLPRI;
2131
2132 while (1) {
2133 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2134 goto end;
2135 }
2136 DBG("Incoming command on sock");
2137 ret = lttng_consumer_recv_cmd(ctx, sock, consumer_sockpoll);
2138 if (ret == -ENOENT) {
2139 DBG("Received STOP command");
2140 goto end;
2141 }
4cbc1a04
DG
2142 if (ret <= 0) {
2143 /*
2144 * This could simply be a session daemon quitting. Don't output
2145 * ERR() here.
2146 */
2147 DBG("Communication interrupted on command socket");
3bd1e081
MD
2148 goto end;
2149 }
2150 if (consumer_quit) {
2151 DBG("consumer_thread_receive_fds received quit from signal");
2152 goto end;
2153 }
2154 DBG("received fds on sock");
2155 }
2156end:
2157 DBG("consumer_thread_receive_fds exiting");
2158
2159 /*
2160 * when all fds have hung up, the polling thread
2161 * can exit cleanly
2162 */
2163 consumer_quit = 1;
2164
2165 /*
2166 * 2s of grace period, if no polling events occur during
2167 * this period, the polling thread will exit even if there
2168 * are still open FDs (should not happen, but safety mechanism).
2169 */
2170 consumer_poll_timeout = LTTNG_CONSUMER_POLL_TIMEOUT;
2171
04fdd819
MD
2172 /*
2173 * Wake-up the other end by writing a null byte in the pipe
2174 * (non-blocking). Important note: Because writing into the
2175 * pipe is non-blocking (and therefore we allow dropping wakeup
2176 * data, as long as there is wakeup data present in the pipe
2177 * buffer to wake up the other end), the other end should
2178 * perform the following sequence for waiting:
2179 * 1) empty the pipe (reads).
2180 * 2) perform update operation.
2181 * 3) wait on the pipe (poll).
2182 */
2183 do {
2184 ret = write(ctx->consumer_poll_pipe[1], "", 1);
6f94560a 2185 } while (ret < 0 && errno == EINTR);
e7b994a3 2186 rcu_unregister_thread();
3bd1e081
MD
2187 return NULL;
2188}
d41f73b7 2189
4078b776 2190ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream,
d41f73b7
MD
2191 struct lttng_consumer_local_data *ctx)
2192{
2193 switch (consumer_data.type) {
2194 case LTTNG_CONSUMER_KERNEL:
2195 return lttng_kconsumer_read_subbuffer(stream, ctx);
7753dea8
MD
2196 case LTTNG_CONSUMER32_UST:
2197 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
2198 return lttng_ustconsumer_read_subbuffer(stream, ctx);
2199 default:
2200 ERR("Unknown consumer_data type");
2201 assert(0);
2202 return -ENOSYS;
2203 }
2204}
2205
2206int lttng_consumer_on_recv_stream(struct lttng_consumer_stream *stream)
2207{
2208 switch (consumer_data.type) {
2209 case LTTNG_CONSUMER_KERNEL:
2210 return lttng_kconsumer_on_recv_stream(stream);
7753dea8
MD
2211 case LTTNG_CONSUMER32_UST:
2212 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
2213 return lttng_ustconsumer_on_recv_stream(stream);
2214 default:
2215 ERR("Unknown consumer_data type");
2216 assert(0);
2217 return -ENOSYS;
2218 }
2219}
e4421fec
DG
2220
2221/*
2222 * Allocate and set consumer data hash tables.
2223 */
2224void lttng_consumer_init(void)
2225{
2226 consumer_data.stream_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
2227 consumer_data.channel_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
00e2e675 2228 consumer_data.relayd_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
e4421fec 2229}
7735ef9e
DG
2230
2231/*
2232 * Process the ADD_RELAYD command receive by a consumer.
2233 *
2234 * This will create a relayd socket pair and add it to the relayd hash table.
2235 * The caller MUST acquire a RCU read side lock before calling it.
2236 */
2237int consumer_add_relayd_socket(int net_seq_idx, int sock_type,
2238 struct lttng_consumer_local_data *ctx, int sock,
2239 struct pollfd *consumer_sockpoll, struct lttcomm_sock *relayd_sock)
2240{
2241 int fd, ret = -1;
2242 struct consumer_relayd_sock_pair *relayd;
2243
2244 DBG("Consumer adding relayd socket (idx: %d)", net_seq_idx);
2245
2246 /* Get relayd reference if exists. */
2247 relayd = consumer_find_relayd(net_seq_idx);
2248 if (relayd == NULL) {
2249 /* Not found. Allocate one. */
2250 relayd = consumer_allocate_relayd_sock_pair(net_seq_idx);
2251 if (relayd == NULL) {
f73fabfd 2252 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
7735ef9e
DG
2253 goto error;
2254 }
2255 }
2256
2257 /* Poll on consumer socket. */
2258 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2259 ret = -EINTR;
2260 goto error;
2261 }
2262
2263 /* Get relayd socket from session daemon */
2264 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
2265 if (ret != sizeof(fd)) {
f73fabfd 2266 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
7735ef9e
DG
2267 ret = -1;
2268 goto error;
2269 }
2270
2271 /* Copy socket information and received FD */
2272 switch (sock_type) {
2273 case LTTNG_STREAM_CONTROL:
2274 /* Copy received lttcomm socket */
2275 lttcomm_copy_sock(&relayd->control_sock, relayd_sock);
2276 ret = lttcomm_create_sock(&relayd->control_sock);
2277 if (ret < 0) {
2278 goto error;
2279 }
2280
2281 /* Close the created socket fd which is useless */
2282 close(relayd->control_sock.fd);
2283
2284 /* Assign new file descriptor */
2285 relayd->control_sock.fd = fd;
2286 break;
2287 case LTTNG_STREAM_DATA:
2288 /* Copy received lttcomm socket */
2289 lttcomm_copy_sock(&relayd->data_sock, relayd_sock);
2290 ret = lttcomm_create_sock(&relayd->data_sock);
2291 if (ret < 0) {
2292 goto error;
2293 }
2294
2295 /* Close the created socket fd which is useless */
2296 close(relayd->data_sock.fd);
2297
2298 /* Assign new file descriptor */
2299 relayd->data_sock.fd = fd;
2300 break;
2301 default:
2302 ERR("Unknown relayd socket type (%d)", sock_type);
2303 goto error;
2304 }
2305
2306 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
2307 sock_type == LTTNG_STREAM_CONTROL ? "control" : "data",
2308 relayd->net_seq_idx, fd);
2309
2310 /*
2311 * Add relayd socket pair to consumer data hashtable. If object already
2312 * exists or on error, the function gracefully returns.
2313 */
d09e1200 2314 add_relayd(relayd);
7735ef9e
DG
2315
2316 /* All good! */
2317 ret = 0;
2318
2319error:
2320 return ret;
2321}
This page took 0.230333 seconds and 4 git commands to generate.