Fix: Add missing call rcu and read side lock
[lttng-tools.git] / src / common / consumer.c
CommitLineData
3bd1e081
MD
1/*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
00e2e675 4 * 2012 - David Goulet <dgoulet@efficios.com>
3bd1e081 5 *
d14d33bf
AM
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2 only,
8 * as published by the Free Software Foundation.
3bd1e081 9 *
d14d33bf
AM
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
3bd1e081 14 *
d14d33bf
AM
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
3bd1e081
MD
18 */
19
20#define _GNU_SOURCE
21#include <assert.h>
3bd1e081
MD
22#include <poll.h>
23#include <pthread.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/mman.h>
27#include <sys/socket.h>
28#include <sys/types.h>
29#include <unistd.h>
77c7c900 30#include <inttypes.h>
3bd1e081 31
990570ed 32#include <common/common.h>
fb3a43a9
DG
33#include <common/utils.h>
34#include <common/compat/poll.h>
10a8a223 35#include <common/kernel-ctl/kernel-ctl.h>
00e2e675 36#include <common/sessiond-comm/relayd.h>
10a8a223
DG
37#include <common/sessiond-comm/sessiond-comm.h>
38#include <common/kernel-consumer/kernel-consumer.h>
00e2e675 39#include <common/relayd/relayd.h>
10a8a223
DG
40#include <common/ust-consumer/ust-consumer.h>
41
42#include "consumer.h"
3bd1e081
MD
43
44struct lttng_consumer_global_data consumer_data = {
3bd1e081
MD
45 .stream_count = 0,
46 .need_update = 1,
47 .type = LTTNG_CONSUMER_UNKNOWN,
48};
49
50/* timeout parameter, to control the polling thread grace period. */
51int consumer_poll_timeout = -1;
52
53/*
54 * Flag to inform the polling thread to quit when all fd hung up. Updated by
55 * the consumer_thread_receive_fds when it notices that all fds has hung up.
56 * Also updated by the signal handler (consumer_should_exit()). Read by the
57 * polling threads.
58 */
59volatile int consumer_quit = 0;
60
61/*
62 * Find a stream. The consumer_data.lock must be locked during this
63 * call.
64 */
65static struct lttng_consumer_stream *consumer_find_stream(int key)
66{
e4421fec
DG
67 struct lttng_ht_iter iter;
68 struct lttng_ht_node_ulong *node;
69 struct lttng_consumer_stream *stream = NULL;
3bd1e081 70
7ad0a0cb 71 /* Negative keys are lookup failures */
7a57cf92 72 if (key < 0) {
7ad0a0cb 73 return NULL;
7a57cf92 74 }
e4421fec 75
6065ceec
DG
76 rcu_read_lock();
77
e4421fec
DG
78 lttng_ht_lookup(consumer_data.stream_ht, (void *)((unsigned long) key),
79 &iter);
80 node = lttng_ht_iter_get_node_ulong(&iter);
81 if (node != NULL) {
82 stream = caa_container_of(node, struct lttng_consumer_stream, node);
3bd1e081 83 }
e4421fec 84
6065ceec
DG
85 rcu_read_unlock();
86
e4421fec 87 return stream;
3bd1e081
MD
88}
89
7ad0a0cb
MD
90static void consumer_steal_stream_key(int key)
91{
92 struct lttng_consumer_stream *stream;
93
04253271 94 rcu_read_lock();
7ad0a0cb 95 stream = consumer_find_stream(key);
04253271 96 if (stream) {
7ad0a0cb 97 stream->key = -1;
04253271
MD
98 /*
99 * We don't want the lookup to match, but we still need
100 * to iterate on this stream when iterating over the hash table. Just
101 * change the node key.
102 */
103 stream->node.key = -1;
104 }
105 rcu_read_unlock();
7ad0a0cb
MD
106}
107
3bd1e081
MD
108static struct lttng_consumer_channel *consumer_find_channel(int key)
109{
e4421fec
DG
110 struct lttng_ht_iter iter;
111 struct lttng_ht_node_ulong *node;
112 struct lttng_consumer_channel *channel = NULL;
3bd1e081 113
7ad0a0cb 114 /* Negative keys are lookup failures */
7a57cf92 115 if (key < 0) {
7ad0a0cb 116 return NULL;
7a57cf92 117 }
e4421fec 118
6065ceec
DG
119 rcu_read_lock();
120
e4421fec
DG
121 lttng_ht_lookup(consumer_data.channel_ht, (void *)((unsigned long) key),
122 &iter);
123 node = lttng_ht_iter_get_node_ulong(&iter);
124 if (node != NULL) {
125 channel = caa_container_of(node, struct lttng_consumer_channel, node);
3bd1e081 126 }
e4421fec 127
6065ceec
DG
128 rcu_read_unlock();
129
e4421fec 130 return channel;
3bd1e081
MD
131}
132
7ad0a0cb
MD
133static void consumer_steal_channel_key(int key)
134{
135 struct lttng_consumer_channel *channel;
136
04253271 137 rcu_read_lock();
7ad0a0cb 138 channel = consumer_find_channel(key);
04253271 139 if (channel) {
7ad0a0cb 140 channel->key = -1;
04253271
MD
141 /*
142 * We don't want the lookup to match, but we still need
143 * to iterate on this channel when iterating over the hash table. Just
144 * change the node key.
145 */
146 channel->node.key = -1;
147 }
148 rcu_read_unlock();
7ad0a0cb
MD
149}
150
702b1ea4
MD
151static
152void consumer_free_stream(struct rcu_head *head)
153{
154 struct lttng_ht_node_ulong *node =
155 caa_container_of(head, struct lttng_ht_node_ulong, head);
156 struct lttng_consumer_stream *stream =
157 caa_container_of(node, struct lttng_consumer_stream, node);
158
159 free(stream);
160}
161
00e2e675
DG
162/*
163 * RCU protected relayd socket pair free.
164 */
165static void consumer_rcu_free_relayd(struct rcu_head *head)
166{
167 struct lttng_ht_node_ulong *node =
168 caa_container_of(head, struct lttng_ht_node_ulong, head);
169 struct consumer_relayd_sock_pair *relayd =
170 caa_container_of(node, struct consumer_relayd_sock_pair, node);
171
172 free(relayd);
173}
174
175/*
176 * Destroy and free relayd socket pair object.
177 *
178 * This function MUST be called with the consumer_data lock acquired.
179 */
d09e1200 180static void destroy_relayd(struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
181{
182 int ret;
183 struct lttng_ht_iter iter;
184
173af62f
DG
185 if (relayd == NULL) {
186 return;
187 }
188
00e2e675
DG
189 DBG("Consumer destroy and close relayd socket pair");
190
191 iter.iter.node = &relayd->node.node;
192 ret = lttng_ht_del(consumer_data.relayd_ht, &iter);
173af62f
DG
193 if (ret != 0) {
194 /* We assume the relayd was already destroyed */
195 return;
196 }
00e2e675
DG
197
198 /* Close all sockets */
199 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
200 (void) relayd_close(&relayd->control_sock);
201 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
202 (void) relayd_close(&relayd->data_sock);
203
204 /* RCU free() call */
205 call_rcu(&relayd->node.head, consumer_rcu_free_relayd);
206}
207
a6ba4fe1
DG
208/*
209 * Flag a relayd socket pair for destruction. Destroy it if the refcount
210 * reaches zero.
211 *
212 * RCU read side lock MUST be aquired before calling this function.
213 */
214void consumer_flag_relayd_for_destroy(struct consumer_relayd_sock_pair *relayd)
215{
216 assert(relayd);
217
218 /* Set destroy flag for this object */
219 uatomic_set(&relayd->destroy_flag, 1);
220
221 /* Destroy the relayd if refcount is 0 */
222 if (uatomic_read(&relayd->refcount) == 0) {
d09e1200 223 destroy_relayd(relayd);
a6ba4fe1
DG
224 }
225}
226
3bd1e081
MD
227/*
228 * Remove a stream from the global list protected by a mutex. This
229 * function is also responsible for freeing its data structures.
230 */
231void consumer_del_stream(struct lttng_consumer_stream *stream)
232{
233 int ret;
e4421fec 234 struct lttng_ht_iter iter;
3bd1e081 235 struct lttng_consumer_channel *free_chan = NULL;
00e2e675
DG
236 struct consumer_relayd_sock_pair *relayd;
237
238 assert(stream);
3bd1e081
MD
239
240 pthread_mutex_lock(&consumer_data.lock);
241
242 switch (consumer_data.type) {
243 case LTTNG_CONSUMER_KERNEL:
244 if (stream->mmap_base != NULL) {
245 ret = munmap(stream->mmap_base, stream->mmap_len);
246 if (ret != 0) {
7a57cf92 247 PERROR("munmap");
3bd1e081
MD
248 }
249 }
250 break;
7753dea8
MD
251 case LTTNG_CONSUMER32_UST:
252 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
253 lttng_ustconsumer_del_stream(stream);
254 break;
255 default:
256 ERR("Unknown consumer_data type");
257 assert(0);
258 goto end;
259 }
260
6065ceec 261 rcu_read_lock();
04253271
MD
262 iter.iter.node = &stream->node.node;
263 ret = lttng_ht_del(consumer_data.stream_ht, &iter);
264 assert(!ret);
e4421fec 265
6065ceec
DG
266 rcu_read_unlock();
267
3bd1e081
MD
268 if (consumer_data.stream_count <= 0) {
269 goto end;
270 }
271 consumer_data.stream_count--;
272 if (!stream) {
273 goto end;
274 }
275 if (stream->out_fd >= 0) {
4c462e79
MD
276 ret = close(stream->out_fd);
277 if (ret) {
278 PERROR("close");
279 }
3bd1e081 280 }
b5c5fc29 281 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
4c462e79
MD
282 ret = close(stream->wait_fd);
283 if (ret) {
284 PERROR("close");
285 }
3bd1e081 286 }
2c1dd183 287 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
4c462e79
MD
288 ret = close(stream->shm_fd);
289 if (ret) {
290 PERROR("close");
291 }
3bd1e081 292 }
00e2e675
DG
293
294 /* Check and cleanup relayd */
b0b335c8 295 rcu_read_lock();
00e2e675
DG
296 relayd = consumer_find_relayd(stream->net_seq_idx);
297 if (relayd != NULL) {
b0b335c8
MD
298 uatomic_dec(&relayd->refcount);
299 assert(uatomic_read(&relayd->refcount) >= 0);
173af62f 300
3f8e211f
DG
301 /* Closing streams requires to lock the control socket. */
302 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
173af62f
DG
303 ret = relayd_send_close_stream(&relayd->control_sock,
304 stream->relayd_stream_id,
305 stream->next_net_seq_num - 1);
3f8e211f 306 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
173af62f 307 if (ret < 0) {
a4b92340
DG
308 DBG("Unable to close stream on the relayd. Continuing");
309 /*
310 * Continue here. There is nothing we can do for the relayd.
311 * Chances are that the relayd has closed the socket so we just
312 * continue cleaning up.
313 */
173af62f
DG
314 }
315
316 /* Both conditions are met, we destroy the relayd. */
317 if (uatomic_read(&relayd->refcount) == 0 &&
318 uatomic_read(&relayd->destroy_flag)) {
d09e1200 319 destroy_relayd(relayd);
00e2e675 320 }
00e2e675 321 }
b0b335c8 322 rcu_read_unlock();
00e2e675 323
c30aaa51
MD
324 uatomic_dec(&stream->chan->refcount);
325 if (!uatomic_read(&stream->chan->refcount)
326 && !uatomic_read(&stream->chan->nb_init_streams)) {
3bd1e081 327 free_chan = stream->chan;
00e2e675
DG
328 }
329
702b1ea4 330 call_rcu(&stream->node.head, consumer_free_stream);
3bd1e081
MD
331end:
332 consumer_data.need_update = 1;
333 pthread_mutex_unlock(&consumer_data.lock);
334
c30aaa51 335 if (free_chan) {
3bd1e081 336 consumer_del_channel(free_chan);
c30aaa51 337 }
3bd1e081
MD
338}
339
340struct lttng_consumer_stream *consumer_allocate_stream(
341 int channel_key, int stream_key,
342 int shm_fd, int wait_fd,
343 enum lttng_consumer_stream_state state,
344 uint64_t mmap_len,
345 enum lttng_event_output output,
6df2e2c9
MD
346 const char *path_name,
347 uid_t uid,
00e2e675
DG
348 gid_t gid,
349 int net_index,
c80048c6
MD
350 int metadata_flag,
351 int *alloc_ret)
3bd1e081
MD
352{
353 struct lttng_consumer_stream *stream;
354 int ret;
355
effcf122 356 stream = zmalloc(sizeof(*stream));
3bd1e081 357 if (stream == NULL) {
7a57cf92 358 PERROR("malloc struct lttng_consumer_stream");
c80048c6 359 *alloc_ret = -ENOMEM;
7a57cf92 360 goto end;
3bd1e081 361 }
7a57cf92
DG
362
363 /*
364 * Get stream's channel reference. Needed when adding the stream to the
365 * global hash table.
366 */
3bd1e081
MD
367 stream->chan = consumer_find_channel(channel_key);
368 if (!stream->chan) {
c80048c6 369 *alloc_ret = -ENOENT;
7a57cf92 370 ERR("Unable to find channel for stream %d", stream_key);
c80048c6 371 goto error;
3bd1e081
MD
372 }
373 stream->chan->refcount++;
374 stream->key = stream_key;
375 stream->shm_fd = shm_fd;
376 stream->wait_fd = wait_fd;
377 stream->out_fd = -1;
378 stream->out_fd_offset = 0;
379 stream->state = state;
380 stream->mmap_len = mmap_len;
381 stream->mmap_base = NULL;
382 stream->output = output;
6df2e2c9
MD
383 stream->uid = uid;
384 stream->gid = gid;
00e2e675
DG
385 stream->net_seq_idx = net_index;
386 stream->metadata_flag = metadata_flag;
387 strncpy(stream->path_name, path_name, sizeof(stream->path_name));
388 stream->path_name[sizeof(stream->path_name) - 1] = '\0';
e4421fec 389 lttng_ht_node_init_ulong(&stream->node, stream->key);
00e2e675 390 lttng_ht_node_init_ulong(&stream->waitfd_node, stream->wait_fd);
3bd1e081
MD
391
392 switch (consumer_data.type) {
393 case LTTNG_CONSUMER_KERNEL:
394 break;
7753dea8
MD
395 case LTTNG_CONSUMER32_UST:
396 case LTTNG_CONSUMER64_UST:
5af2f756 397 stream->cpu = stream->chan->cpucount++;
3bd1e081
MD
398 ret = lttng_ustconsumer_allocate_stream(stream);
399 if (ret) {
c80048c6
MD
400 *alloc_ret = -EINVAL;
401 goto error;
3bd1e081
MD
402 }
403 break;
404 default:
405 ERR("Unknown consumer_data type");
c80048c6
MD
406 *alloc_ret = -EINVAL;
407 goto error;
3bd1e081 408 }
c30aaa51
MD
409
410 /*
411 * When nb_init_streams reaches 0, we don't need to trigger any action in
412 * terms of destroying the associated channel, because the action that
413 * causes the count to become 0 also causes a stream to be added. The
414 * channel deletion will thus be triggered by the following removal of this
415 * stream.
416 */
417 if (uatomic_read(&stream->chan->nb_init_streams) > 0) {
418 uatomic_dec(&stream->chan->nb_init_streams);
419 }
420
421 DBG3("Allocated stream %s (key %d, shm_fd %d, wait_fd %d, mmap_len %llu,"
422 " out_fd %d, net_seq_idx %d)", stream->path_name, stream->key,
423 stream->shm_fd, stream->wait_fd,
424 (unsigned long long) stream->mmap_len, stream->out_fd,
00e2e675 425 stream->net_seq_idx);
3bd1e081 426 return stream;
c80048c6
MD
427
428error:
429 free(stream);
7a57cf92 430end:
c80048c6 431 return NULL;
3bd1e081
MD
432}
433
434/*
435 * Add a stream to the global list protected by a mutex.
436 */
437int consumer_add_stream(struct lttng_consumer_stream *stream)
438{
439 int ret = 0;
c77fc10a
DG
440 struct lttng_ht_node_ulong *node;
441 struct lttng_ht_iter iter;
00e2e675 442 struct consumer_relayd_sock_pair *relayd;
3bd1e081
MD
443
444 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
445 /* Steal stream identifier, for UST */
446 consumer_steal_stream_key(stream->key);
c77fc10a 447
b0b335c8 448 rcu_read_lock();
c77fc10a
DG
449 lttng_ht_lookup(consumer_data.stream_ht,
450 (void *)((unsigned long) stream->key), &iter);
451 node = lttng_ht_iter_get_node_ulong(&iter);
452 if (node != NULL) {
453 rcu_read_unlock();
454 /* Stream already exist. Ignore the insertion */
455 goto end;
456 }
457
04253271 458 lttng_ht_add_unique_ulong(consumer_data.stream_ht, &stream->node);
00e2e675
DG
459
460 /* Check and cleanup relayd */
461 relayd = consumer_find_relayd(stream->net_seq_idx);
462 if (relayd != NULL) {
b0b335c8 463 uatomic_inc(&relayd->refcount);
00e2e675 464 }
b0b335c8 465 rcu_read_unlock();
00e2e675
DG
466
467 /* Update consumer data */
3bd1e081
MD
468 consumer_data.stream_count++;
469 consumer_data.need_update = 1;
470
3bd1e081
MD
471end:
472 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 473
3bd1e081
MD
474 return ret;
475}
476
00e2e675 477/*
3f8e211f
DG
478 * Add relayd socket to global consumer data hashtable. RCU read side lock MUST
479 * be acquired before calling this.
00e2e675 480 */
d09e1200 481static int add_relayd(struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
482{
483 int ret = 0;
484 struct lttng_ht_node_ulong *node;
485 struct lttng_ht_iter iter;
486
487 if (relayd == NULL) {
488 ret = -1;
489 goto end;
490 }
491
00e2e675
DG
492 lttng_ht_lookup(consumer_data.relayd_ht,
493 (void *)((unsigned long) relayd->net_seq_idx), &iter);
494 node = lttng_ht_iter_get_node_ulong(&iter);
495 if (node != NULL) {
00e2e675
DG
496 /* Relayd already exist. Ignore the insertion */
497 goto end;
498 }
499 lttng_ht_add_unique_ulong(consumer_data.relayd_ht, &relayd->node);
500
00e2e675
DG
501end:
502 return ret;
503}
504
505/*
506 * Allocate and return a consumer relayd socket.
507 */
508struct consumer_relayd_sock_pair *consumer_allocate_relayd_sock_pair(
509 int net_seq_idx)
510{
511 struct consumer_relayd_sock_pair *obj = NULL;
512
513 /* Negative net sequence index is a failure */
514 if (net_seq_idx < 0) {
515 goto error;
516 }
517
518 obj = zmalloc(sizeof(struct consumer_relayd_sock_pair));
519 if (obj == NULL) {
520 PERROR("zmalloc relayd sock");
521 goto error;
522 }
523
524 obj->net_seq_idx = net_seq_idx;
525 obj->refcount = 0;
173af62f 526 obj->destroy_flag = 0;
00e2e675
DG
527 lttng_ht_node_init_ulong(&obj->node, obj->net_seq_idx);
528 pthread_mutex_init(&obj->ctrl_sock_mutex, NULL);
529
530error:
531 return obj;
532}
533
534/*
535 * Find a relayd socket pair in the global consumer data.
536 *
537 * Return the object if found else NULL.
b0b335c8
MD
538 * RCU read-side lock must be held across this call and while using the
539 * returned object.
00e2e675
DG
540 */
541struct consumer_relayd_sock_pair *consumer_find_relayd(int key)
542{
543 struct lttng_ht_iter iter;
544 struct lttng_ht_node_ulong *node;
545 struct consumer_relayd_sock_pair *relayd = NULL;
546
547 /* Negative keys are lookup failures */
548 if (key < 0) {
549 goto error;
550 }
551
00e2e675
DG
552 lttng_ht_lookup(consumer_data.relayd_ht, (void *)((unsigned long) key),
553 &iter);
554 node = lttng_ht_iter_get_node_ulong(&iter);
555 if (node != NULL) {
556 relayd = caa_container_of(node, struct consumer_relayd_sock_pair, node);
557 }
558
00e2e675
DG
559error:
560 return relayd;
561}
562
563/*
564 * Handle stream for relayd transmission if the stream applies for network
565 * streaming where the net sequence index is set.
566 *
567 * Return destination file descriptor or negative value on error.
568 */
6197aea7 569static int write_relayd_stream_header(struct lttng_consumer_stream *stream,
1d4dfdef
DG
570 size_t data_size, unsigned long padding,
571 struct consumer_relayd_sock_pair *relayd)
00e2e675
DG
572{
573 int outfd = -1, ret;
00e2e675
DG
574 struct lttcomm_relayd_data_hdr data_hdr;
575
576 /* Safety net */
577 assert(stream);
6197aea7 578 assert(relayd);
00e2e675
DG
579
580 /* Reset data header */
581 memset(&data_hdr, 0, sizeof(data_hdr));
582
00e2e675
DG
583 if (stream->metadata_flag) {
584 /* Caller MUST acquire the relayd control socket lock */
585 ret = relayd_send_metadata(&relayd->control_sock, data_size);
586 if (ret < 0) {
587 goto error;
588 }
589
590 /* Metadata are always sent on the control socket. */
591 outfd = relayd->control_sock.fd;
592 } else {
593 /* Set header with stream information */
594 data_hdr.stream_id = htobe64(stream->relayd_stream_id);
595 data_hdr.data_size = htobe32(data_size);
1d4dfdef 596 data_hdr.padding_size = htobe32(padding);
173af62f 597 data_hdr.net_seq_num = htobe64(stream->next_net_seq_num++);
00e2e675
DG
598 /* Other fields are zeroed previously */
599
600 ret = relayd_send_data_hdr(&relayd->data_sock, &data_hdr,
601 sizeof(data_hdr));
602 if (ret < 0) {
603 goto error;
604 }
605
606 /* Set to go on data socket */
607 outfd = relayd->data_sock.fd;
608 }
609
610error:
611 return outfd;
612}
613
3bd1e081
MD
614/*
615 * Update a stream according to what we just received.
616 */
617void consumer_change_stream_state(int stream_key,
618 enum lttng_consumer_stream_state state)
619{
620 struct lttng_consumer_stream *stream;
621
622 pthread_mutex_lock(&consumer_data.lock);
623 stream = consumer_find_stream(stream_key);
624 if (stream) {
625 stream->state = state;
626 }
627 consumer_data.need_update = 1;
628 pthread_mutex_unlock(&consumer_data.lock);
629}
630
702b1ea4
MD
631static
632void consumer_free_channel(struct rcu_head *head)
633{
634 struct lttng_ht_node_ulong *node =
635 caa_container_of(head, struct lttng_ht_node_ulong, head);
636 struct lttng_consumer_channel *channel =
637 caa_container_of(node, struct lttng_consumer_channel, node);
638
639 free(channel);
640}
641
3bd1e081
MD
642/*
643 * Remove a channel from the global list protected by a mutex. This
644 * function is also responsible for freeing its data structures.
645 */
646void consumer_del_channel(struct lttng_consumer_channel *channel)
647{
648 int ret;
e4421fec 649 struct lttng_ht_iter iter;
3bd1e081
MD
650
651 pthread_mutex_lock(&consumer_data.lock);
652
653 switch (consumer_data.type) {
654 case LTTNG_CONSUMER_KERNEL:
655 break;
7753dea8
MD
656 case LTTNG_CONSUMER32_UST:
657 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
658 lttng_ustconsumer_del_channel(channel);
659 break;
660 default:
661 ERR("Unknown consumer_data type");
662 assert(0);
663 goto end;
664 }
665
6065ceec 666 rcu_read_lock();
04253271
MD
667 iter.iter.node = &channel->node.node;
668 ret = lttng_ht_del(consumer_data.channel_ht, &iter);
669 assert(!ret);
6065ceec
DG
670 rcu_read_unlock();
671
3bd1e081
MD
672 if (channel->mmap_base != NULL) {
673 ret = munmap(channel->mmap_base, channel->mmap_len);
674 if (ret != 0) {
7a57cf92 675 PERROR("munmap");
3bd1e081
MD
676 }
677 }
b5c5fc29 678 if (channel->wait_fd >= 0 && !channel->wait_fd_is_copy) {
4c462e79
MD
679 ret = close(channel->wait_fd);
680 if (ret) {
681 PERROR("close");
682 }
3bd1e081 683 }
2c1dd183 684 if (channel->shm_fd >= 0 && channel->wait_fd != channel->shm_fd) {
4c462e79
MD
685 ret = close(channel->shm_fd);
686 if (ret) {
687 PERROR("close");
688 }
3bd1e081 689 }
702b1ea4
MD
690
691 call_rcu(&channel->node.head, consumer_free_channel);
3bd1e081
MD
692end:
693 pthread_mutex_unlock(&consumer_data.lock);
694}
695
696struct lttng_consumer_channel *consumer_allocate_channel(
697 int channel_key,
698 int shm_fd, int wait_fd,
699 uint64_t mmap_len,
c30aaa51
MD
700 uint64_t max_sb_size,
701 unsigned int nb_init_streams)
3bd1e081
MD
702{
703 struct lttng_consumer_channel *channel;
704 int ret;
705
276b26d1 706 channel = zmalloc(sizeof(*channel));
3bd1e081 707 if (channel == NULL) {
7a57cf92 708 PERROR("malloc struct lttng_consumer_channel");
3bd1e081
MD
709 goto end;
710 }
711 channel->key = channel_key;
712 channel->shm_fd = shm_fd;
713 channel->wait_fd = wait_fd;
714 channel->mmap_len = mmap_len;
715 channel->max_sb_size = max_sb_size;
716 channel->refcount = 0;
c30aaa51 717 channel->nb_init_streams = nb_init_streams;
e4421fec 718 lttng_ht_node_init_ulong(&channel->node, channel->key);
3bd1e081
MD
719
720 switch (consumer_data.type) {
721 case LTTNG_CONSUMER_KERNEL:
722 channel->mmap_base = NULL;
723 channel->mmap_len = 0;
724 break;
7753dea8
MD
725 case LTTNG_CONSUMER32_UST:
726 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
727 ret = lttng_ustconsumer_allocate_channel(channel);
728 if (ret) {
729 free(channel);
730 return NULL;
731 }
732 break;
733 default:
734 ERR("Unknown consumer_data type");
735 assert(0);
736 goto end;
737 }
738 DBG("Allocated channel (key %d, shm_fd %d, wait_fd %d, mmap_len %llu, max_sb_size %llu)",
00e2e675 739 channel->key, channel->shm_fd, channel->wait_fd,
3bd1e081
MD
740 (unsigned long long) channel->mmap_len,
741 (unsigned long long) channel->max_sb_size);
742end:
743 return channel;
744}
745
746/*
747 * Add a channel to the global list protected by a mutex.
748 */
749int consumer_add_channel(struct lttng_consumer_channel *channel)
750{
c77fc10a
DG
751 struct lttng_ht_node_ulong *node;
752 struct lttng_ht_iter iter;
753
3bd1e081 754 pthread_mutex_lock(&consumer_data.lock);
7ad0a0cb
MD
755 /* Steal channel identifier, for UST */
756 consumer_steal_channel_key(channel->key);
6065ceec 757 rcu_read_lock();
c77fc10a
DG
758
759 lttng_ht_lookup(consumer_data.channel_ht,
760 (void *)((unsigned long) channel->key), &iter);
761 node = lttng_ht_iter_get_node_ulong(&iter);
762 if (node != NULL) {
763 /* Channel already exist. Ignore the insertion */
764 goto end;
765 }
766
04253271 767 lttng_ht_add_unique_ulong(consumer_data.channel_ht, &channel->node);
c77fc10a
DG
768
769end:
6065ceec 770 rcu_read_unlock();
3bd1e081 771 pthread_mutex_unlock(&consumer_data.lock);
702b1ea4 772
7ad0a0cb 773 return 0;
3bd1e081
MD
774}
775
776/*
777 * Allocate the pollfd structure and the local view of the out fds to avoid
778 * doing a lookup in the linked list and concurrency issues when writing is
779 * needed. Called with consumer_data.lock held.
780 *
781 * Returns the number of fds in the structures.
782 */
783int consumer_update_poll_array(
784 struct lttng_consumer_local_data *ctx, struct pollfd **pollfd,
fb3a43a9 785 struct lttng_consumer_stream **local_stream)
3bd1e081 786{
3bd1e081 787 int i = 0;
e4421fec
DG
788 struct lttng_ht_iter iter;
789 struct lttng_consumer_stream *stream;
3bd1e081
MD
790
791 DBG("Updating poll fd array");
481d6c57 792 rcu_read_lock();
e4421fec
DG
793 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, stream,
794 node.node) {
795 if (stream->state != LTTNG_CONSUMER_ACTIVE_STREAM) {
3bd1e081
MD
796 continue;
797 }
e4421fec
DG
798 DBG("Active FD %d", stream->wait_fd);
799 (*pollfd)[i].fd = stream->wait_fd;
3bd1e081 800 (*pollfd)[i].events = POLLIN | POLLPRI;
e4421fec 801 local_stream[i] = stream;
3bd1e081
MD
802 i++;
803 }
481d6c57 804 rcu_read_unlock();
3bd1e081
MD
805
806 /*
807 * Insert the consumer_poll_pipe at the end of the array and don't
808 * increment i so nb_fd is the number of real FD.
809 */
810 (*pollfd)[i].fd = ctx->consumer_poll_pipe[0];
509bb1cf 811 (*pollfd)[i].events = POLLIN | POLLPRI;
3bd1e081
MD
812 return i;
813}
814
815/*
816 * Poll on the should_quit pipe and the command socket return -1 on error and
817 * should exit, 0 if data is available on the command socket
818 */
819int lttng_consumer_poll_socket(struct pollfd *consumer_sockpoll)
820{
821 int num_rdy;
822
88f2b785 823restart:
3bd1e081
MD
824 num_rdy = poll(consumer_sockpoll, 2, -1);
825 if (num_rdy == -1) {
88f2b785
MD
826 /*
827 * Restart interrupted system call.
828 */
829 if (errno == EINTR) {
830 goto restart;
831 }
7a57cf92 832 PERROR("Poll error");
3bd1e081
MD
833 goto exit;
834 }
509bb1cf 835 if (consumer_sockpoll[0].revents & (POLLIN | POLLPRI)) {
3bd1e081
MD
836 DBG("consumer_should_quit wake up");
837 goto exit;
838 }
839 return 0;
840
841exit:
842 return -1;
843}
844
845/*
846 * Set the error socket.
847 */
848void lttng_consumer_set_error_sock(
849 struct lttng_consumer_local_data *ctx, int sock)
850{
851 ctx->consumer_error_socket = sock;
852}
853
854/*
855 * Set the command socket path.
856 */
3bd1e081
MD
857void lttng_consumer_set_command_sock_path(
858 struct lttng_consumer_local_data *ctx, char *sock)
859{
860 ctx->consumer_command_sock_path = sock;
861}
862
863/*
864 * Send return code to the session daemon.
865 * If the socket is not defined, we return 0, it is not a fatal error
866 */
867int lttng_consumer_send_error(
868 struct lttng_consumer_local_data *ctx, int cmd)
869{
870 if (ctx->consumer_error_socket > 0) {
871 return lttcomm_send_unix_sock(ctx->consumer_error_socket, &cmd,
872 sizeof(enum lttcomm_sessiond_command));
873 }
874
875 return 0;
876}
877
878/*
879 * Close all the tracefiles and stream fds, should be called when all instances
880 * are destroyed.
881 */
882void lttng_consumer_cleanup(void)
883{
e4421fec 884 struct lttng_ht_iter iter;
6065ceec
DG
885 struct lttng_ht_node_ulong *node;
886
887 rcu_read_lock();
3bd1e081
MD
888
889 /*
6065ceec
DG
890 * close all outfd. Called when there are no more threads running (after
891 * joining on the threads), no need to protect list iteration with mutex.
3bd1e081 892 */
6065ceec
DG
893 cds_lfht_for_each_entry(consumer_data.stream_ht->ht, &iter.iter, node,
894 node) {
702b1ea4
MD
895 struct lttng_consumer_stream *stream =
896 caa_container_of(node, struct lttng_consumer_stream, node);
897 consumer_del_stream(stream);
3bd1e081 898 }
e4421fec 899
6065ceec
DG
900 cds_lfht_for_each_entry(consumer_data.channel_ht->ht, &iter.iter, node,
901 node) {
702b1ea4
MD
902 struct lttng_consumer_channel *channel =
903 caa_container_of(node, struct lttng_consumer_channel, node);
904 consumer_del_channel(channel);
3bd1e081 905 }
6065ceec
DG
906
907 rcu_read_unlock();
d6ce1df2
MD
908
909 lttng_ht_destroy(consumer_data.stream_ht);
910 lttng_ht_destroy(consumer_data.channel_ht);
3bd1e081
MD
911}
912
913/*
914 * Called from signal handler.
915 */
916void lttng_consumer_should_exit(struct lttng_consumer_local_data *ctx)
917{
918 int ret;
919 consumer_quit = 1;
6f94560a
MD
920 do {
921 ret = write(ctx->consumer_should_quit[1], "4", 1);
922 } while (ret < 0 && errno == EINTR);
3bd1e081 923 if (ret < 0) {
7a57cf92 924 PERROR("write consumer quit");
3bd1e081
MD
925 }
926}
927
00e2e675
DG
928void lttng_consumer_sync_trace_file(struct lttng_consumer_stream *stream,
929 off_t orig_offset)
3bd1e081
MD
930{
931 int outfd = stream->out_fd;
932
933 /*
934 * This does a blocking write-and-wait on any page that belongs to the
935 * subbuffer prior to the one we just wrote.
936 * Don't care about error values, as these are just hints and ways to
937 * limit the amount of page cache used.
938 */
939 if (orig_offset < stream->chan->max_sb_size) {
940 return;
941 }
b9182dd9 942 lttng_sync_file_range(outfd, orig_offset - stream->chan->max_sb_size,
3bd1e081
MD
943 stream->chan->max_sb_size,
944 SYNC_FILE_RANGE_WAIT_BEFORE
945 | SYNC_FILE_RANGE_WRITE
946 | SYNC_FILE_RANGE_WAIT_AFTER);
947 /*
948 * Give hints to the kernel about how we access the file:
949 * POSIX_FADV_DONTNEED : we won't re-access data in a near future after
950 * we write it.
951 *
952 * We need to call fadvise again after the file grows because the
953 * kernel does not seem to apply fadvise to non-existing parts of the
954 * file.
955 *
956 * Call fadvise _after_ having waited for the page writeback to
957 * complete because the dirty page writeback semantic is not well
958 * defined. So it can be expected to lead to lower throughput in
959 * streaming.
960 */
961 posix_fadvise(outfd, orig_offset - stream->chan->max_sb_size,
962 stream->chan->max_sb_size, POSIX_FADV_DONTNEED);
963}
964
965/*
966 * Initialise the necessary environnement :
967 * - create a new context
968 * - create the poll_pipe
969 * - create the should_quit pipe (for signal handler)
970 * - create the thread pipe (for splice)
971 *
972 * Takes a function pointer as argument, this function is called when data is
973 * available on a buffer. This function is responsible to do the
974 * kernctl_get_next_subbuf, read the data with mmap or splice depending on the
975 * buffer configuration and then kernctl_put_next_subbuf at the end.
976 *
977 * Returns a pointer to the new context or NULL on error.
978 */
979struct lttng_consumer_local_data *lttng_consumer_create(
980 enum lttng_consumer_type type,
4078b776 981 ssize_t (*buffer_ready)(struct lttng_consumer_stream *stream,
d41f73b7 982 struct lttng_consumer_local_data *ctx),
3bd1e081
MD
983 int (*recv_channel)(struct lttng_consumer_channel *channel),
984 int (*recv_stream)(struct lttng_consumer_stream *stream),
985 int (*update_stream)(int stream_key, uint32_t state))
986{
987 int ret, i;
988 struct lttng_consumer_local_data *ctx;
989
990 assert(consumer_data.type == LTTNG_CONSUMER_UNKNOWN ||
991 consumer_data.type == type);
992 consumer_data.type = type;
993
effcf122 994 ctx = zmalloc(sizeof(struct lttng_consumer_local_data));
3bd1e081 995 if (ctx == NULL) {
7a57cf92 996 PERROR("allocating context");
3bd1e081
MD
997 goto error;
998 }
999
1000 ctx->consumer_error_socket = -1;
1001 /* assign the callbacks */
1002 ctx->on_buffer_ready = buffer_ready;
1003 ctx->on_recv_channel = recv_channel;
1004 ctx->on_recv_stream = recv_stream;
1005 ctx->on_update_stream = update_stream;
1006
1007 ret = pipe(ctx->consumer_poll_pipe);
1008 if (ret < 0) {
7a57cf92 1009 PERROR("Error creating poll pipe");
3bd1e081
MD
1010 goto error_poll_pipe;
1011 }
1012
04fdd819
MD
1013 /* set read end of the pipe to non-blocking */
1014 ret = fcntl(ctx->consumer_poll_pipe[0], F_SETFL, O_NONBLOCK);
1015 if (ret < 0) {
7a57cf92 1016 PERROR("fcntl O_NONBLOCK");
04fdd819
MD
1017 goto error_poll_fcntl;
1018 }
1019
1020 /* set write end of the pipe to non-blocking */
1021 ret = fcntl(ctx->consumer_poll_pipe[1], F_SETFL, O_NONBLOCK);
1022 if (ret < 0) {
7a57cf92 1023 PERROR("fcntl O_NONBLOCK");
04fdd819
MD
1024 goto error_poll_fcntl;
1025 }
1026
3bd1e081
MD
1027 ret = pipe(ctx->consumer_should_quit);
1028 if (ret < 0) {
7a57cf92 1029 PERROR("Error creating recv pipe");
3bd1e081
MD
1030 goto error_quit_pipe;
1031 }
1032
1033 ret = pipe(ctx->consumer_thread_pipe);
1034 if (ret < 0) {
7a57cf92 1035 PERROR("Error creating thread pipe");
3bd1e081
MD
1036 goto error_thread_pipe;
1037 }
1038
fb3a43a9
DG
1039 ret = utils_create_pipe(ctx->consumer_metadata_pipe);
1040 if (ret < 0) {
1041 goto error_metadata_pipe;
1042 }
3bd1e081 1043
fb3a43a9
DG
1044 ret = utils_create_pipe(ctx->consumer_splice_metadata_pipe);
1045 if (ret < 0) {
1046 goto error_splice_pipe;
1047 }
1048
1049 return ctx;
3bd1e081 1050
fb3a43a9
DG
1051error_splice_pipe:
1052 utils_close_pipe(ctx->consumer_metadata_pipe);
1053error_metadata_pipe:
1054 utils_close_pipe(ctx->consumer_thread_pipe);
3bd1e081
MD
1055error_thread_pipe:
1056 for (i = 0; i < 2; i++) {
1057 int err;
1058
1059 err = close(ctx->consumer_should_quit[i]);
4c462e79
MD
1060 if (err) {
1061 PERROR("close");
1062 }
3bd1e081 1063 }
04fdd819 1064error_poll_fcntl:
3bd1e081
MD
1065error_quit_pipe:
1066 for (i = 0; i < 2; i++) {
1067 int err;
1068
1069 err = close(ctx->consumer_poll_pipe[i]);
4c462e79
MD
1070 if (err) {
1071 PERROR("close");
1072 }
3bd1e081
MD
1073 }
1074error_poll_pipe:
1075 free(ctx);
1076error:
1077 return NULL;
1078}
1079
1080/*
1081 * Close all fds associated with the instance and free the context.
1082 */
1083void lttng_consumer_destroy(struct lttng_consumer_local_data *ctx)
1084{
4c462e79
MD
1085 int ret;
1086
1087 ret = close(ctx->consumer_error_socket);
1088 if (ret) {
1089 PERROR("close");
1090 }
1091 ret = close(ctx->consumer_thread_pipe[0]);
1092 if (ret) {
1093 PERROR("close");
1094 }
1095 ret = close(ctx->consumer_thread_pipe[1]);
1096 if (ret) {
1097 PERROR("close");
1098 }
1099 ret = close(ctx->consumer_poll_pipe[0]);
1100 if (ret) {
1101 PERROR("close");
1102 }
1103 ret = close(ctx->consumer_poll_pipe[1]);
1104 if (ret) {
1105 PERROR("close");
1106 }
1107 ret = close(ctx->consumer_should_quit[0]);
1108 if (ret) {
1109 PERROR("close");
1110 }
1111 ret = close(ctx->consumer_should_quit[1]);
1112 if (ret) {
1113 PERROR("close");
1114 }
fb3a43a9
DG
1115 utils_close_pipe(ctx->consumer_splice_metadata_pipe);
1116
3bd1e081
MD
1117 unlink(ctx->consumer_command_sock_path);
1118 free(ctx);
1119}
1120
6197aea7
DG
1121/*
1122 * Write the metadata stream id on the specified file descriptor.
1123 */
1124static int write_relayd_metadata_id(int fd,
1125 struct lttng_consumer_stream *stream,
1d4dfdef
DG
1126 struct consumer_relayd_sock_pair *relayd,
1127 unsigned long padding)
6197aea7
DG
1128{
1129 int ret;
1d4dfdef 1130 struct lttcomm_relayd_metadata_payload hdr;
6197aea7 1131
1d4dfdef
DG
1132 hdr.stream_id = htobe64(stream->relayd_stream_id);
1133 hdr.padding_size = htobe32(padding);
6197aea7 1134 do {
1d4dfdef 1135 ret = write(fd, (void *) &hdr, sizeof(hdr));
6197aea7
DG
1136 } while (ret < 0 && errno == EINTR);
1137 if (ret < 0) {
1138 PERROR("write metadata stream id");
1139 goto end;
1140 }
1d4dfdef
DG
1141 DBG("Metadata stream id %" PRIu64 " with padding %lu written before data",
1142 stream->relayd_stream_id, padding);
6197aea7
DG
1143
1144end:
1145 return ret;
1146}
1147
3bd1e081 1148/*
09e26845
DG
1149 * Mmap the ring buffer, read it and write the data to the tracefile. This is a
1150 * core function for writing trace buffers to either the local filesystem or
1151 * the network.
1152 *
1153 * Careful review MUST be put if any changes occur!
3bd1e081
MD
1154 *
1155 * Returns the number of bytes written
1156 */
4078b776 1157ssize_t lttng_consumer_on_read_subbuffer_mmap(
3bd1e081 1158 struct lttng_consumer_local_data *ctx,
1d4dfdef
DG
1159 struct lttng_consumer_stream *stream, unsigned long len,
1160 unsigned long padding)
3bd1e081 1161{
f02e1e8a
DG
1162 unsigned long mmap_offset;
1163 ssize_t ret = 0, written = 0;
1164 off_t orig_offset = stream->out_fd_offset;
1165 /* Default is on the disk */
1166 int outfd = stream->out_fd;
f02e1e8a
DG
1167 struct consumer_relayd_sock_pair *relayd = NULL;
1168
1169 /* RCU lock for the relayd pointer */
1170 rcu_read_lock();
1171
1172 /* Flag that the current stream if set for network streaming. */
1173 if (stream->net_seq_idx != -1) {
1174 relayd = consumer_find_relayd(stream->net_seq_idx);
1175 if (relayd == NULL) {
1176 goto end;
1177 }
1178 }
1179
1180 /* get the offset inside the fd to mmap */
3bd1e081
MD
1181 switch (consumer_data.type) {
1182 case LTTNG_CONSUMER_KERNEL:
f02e1e8a
DG
1183 ret = kernctl_get_mmap_read_offset(stream->wait_fd, &mmap_offset);
1184 break;
7753dea8
MD
1185 case LTTNG_CONSUMER32_UST:
1186 case LTTNG_CONSUMER64_UST:
f02e1e8a
DG
1187 ret = lttng_ustctl_get_mmap_read_offset(stream->chan->handle,
1188 stream->buf, &mmap_offset);
1189 break;
3bd1e081
MD
1190 default:
1191 ERR("Unknown consumer_data type");
1192 assert(0);
1193 }
f02e1e8a
DG
1194 if (ret != 0) {
1195 errno = -ret;
1196 PERROR("tracer ctl get_mmap_read_offset");
1197 written = ret;
1198 goto end;
1199 }
b9182dd9 1200
f02e1e8a
DG
1201 /* Handle stream on the relayd if the output is on the network */
1202 if (relayd) {
1203 unsigned long netlen = len;
1204
1205 /*
1206 * Lock the control socket for the complete duration of the function
1207 * since from this point on we will use the socket.
1208 */
1209 if (stream->metadata_flag) {
1210 /* Metadata requires the control socket. */
1211 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1d4dfdef 1212 netlen += sizeof(struct lttcomm_relayd_metadata_payload);
f02e1e8a
DG
1213 }
1214
1d4dfdef 1215 ret = write_relayd_stream_header(stream, netlen, padding, relayd);
f02e1e8a
DG
1216 if (ret >= 0) {
1217 /* Use the returned socket. */
1218 outfd = ret;
1219
1220 /* Write metadata stream id before payload */
1221 if (stream->metadata_flag) {
1d4dfdef 1222 ret = write_relayd_metadata_id(outfd, stream, relayd, padding);
f02e1e8a 1223 if (ret < 0) {
f02e1e8a
DG
1224 written = ret;
1225 goto end;
1226 }
f02e1e8a
DG
1227 }
1228 }
1229 /* Else, use the default set before which is the filesystem. */
1d4dfdef
DG
1230 } else {
1231 /* No streaming, we have to set the len with the full padding */
1232 len += padding;
f02e1e8a
DG
1233 }
1234
1235 while (len > 0) {
1236 do {
1237 ret = write(outfd, stream->mmap_base + mmap_offset, len);
1238 } while (ret < 0 && errno == EINTR);
1d4dfdef 1239 DBG("Consumer mmap write() ret %zd (len %lu)", ret, len);
f02e1e8a
DG
1240 if (ret < 0) {
1241 PERROR("Error in file write");
1242 if (written == 0) {
1243 written = ret;
1244 }
1245 goto end;
1246 } else if (ret > len) {
77c7c900 1247 PERROR("Error in file write (ret %zd > len %lu)", ret, len);
f02e1e8a
DG
1248 written += ret;
1249 goto end;
1250 } else {
1251 len -= ret;
1252 mmap_offset += ret;
1253 }
f02e1e8a
DG
1254
1255 /* This call is useless on a socket so better save a syscall. */
1256 if (!relayd) {
1257 /* This won't block, but will start writeout asynchronously */
1258 lttng_sync_file_range(outfd, stream->out_fd_offset, ret,
1259 SYNC_FILE_RANGE_WRITE);
1260 stream->out_fd_offset += ret;
1261 }
1262 written += ret;
1263 }
1264 lttng_consumer_sync_trace_file(stream, orig_offset);
1265
1266end:
1267 /* Unlock only if ctrl socket used */
1268 if (relayd && stream->metadata_flag) {
1269 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1270 }
1271
1272 rcu_read_unlock();
1273 return written;
3bd1e081
MD
1274}
1275
1276/*
1277 * Splice the data from the ring buffer to the tracefile.
1278 *
1279 * Returns the number of bytes spliced.
1280 */
4078b776 1281ssize_t lttng_consumer_on_read_subbuffer_splice(
3bd1e081 1282 struct lttng_consumer_local_data *ctx,
1d4dfdef
DG
1283 struct lttng_consumer_stream *stream, unsigned long len,
1284 unsigned long padding)
3bd1e081 1285{
f02e1e8a
DG
1286 ssize_t ret = 0, written = 0, ret_splice = 0;
1287 loff_t offset = 0;
1288 off_t orig_offset = stream->out_fd_offset;
1289 int fd = stream->wait_fd;
1290 /* Default is on the disk */
1291 int outfd = stream->out_fd;
f02e1e8a 1292 struct consumer_relayd_sock_pair *relayd = NULL;
fb3a43a9 1293 int *splice_pipe;
f02e1e8a 1294
3bd1e081
MD
1295 switch (consumer_data.type) {
1296 case LTTNG_CONSUMER_KERNEL:
f02e1e8a 1297 break;
7753dea8
MD
1298 case LTTNG_CONSUMER32_UST:
1299 case LTTNG_CONSUMER64_UST:
f02e1e8a 1300 /* Not supported for user space tracing */
3bd1e081
MD
1301 return -ENOSYS;
1302 default:
1303 ERR("Unknown consumer_data type");
1304 assert(0);
3bd1e081
MD
1305 }
1306
f02e1e8a
DG
1307 /* RCU lock for the relayd pointer */
1308 rcu_read_lock();
1309
1310 /* Flag that the current stream if set for network streaming. */
1311 if (stream->net_seq_idx != -1) {
1312 relayd = consumer_find_relayd(stream->net_seq_idx);
1313 if (relayd == NULL) {
1314 goto end;
1315 }
1316 }
1317
fb3a43a9
DG
1318 /*
1319 * Choose right pipe for splice. Metadata and trace data are handled by
1320 * different threads hence the use of two pipes in order not to race or
1321 * corrupt the written data.
1322 */
1323 if (stream->metadata_flag) {
1324 splice_pipe = ctx->consumer_splice_metadata_pipe;
1325 } else {
1326 splice_pipe = ctx->consumer_thread_pipe;
1327 }
1328
f02e1e8a 1329 /* Write metadata stream id before payload */
1d4dfdef
DG
1330 if (relayd) {
1331 int total_len = len;
f02e1e8a 1332
1d4dfdef
DG
1333 if (stream->metadata_flag) {
1334 /*
1335 * Lock the control socket for the complete duration of the function
1336 * since from this point on we will use the socket.
1337 */
1338 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1339
1340 ret = write_relayd_metadata_id(splice_pipe[1], stream, relayd,
1341 padding);
1342 if (ret < 0) {
1343 written = ret;
1344 goto end;
1345 }
1346
1347 total_len += sizeof(struct lttcomm_relayd_metadata_payload);
1348 }
1349
1350 ret = write_relayd_stream_header(stream, total_len, padding, relayd);
1351 if (ret >= 0) {
1352 /* Use the returned socket. */
1353 outfd = ret;
1354 } else {
1355 ERR("Remote relayd disconnected. Stopping");
f02e1e8a
DG
1356 goto end;
1357 }
1d4dfdef
DG
1358 } else {
1359 /* No streaming, we have to set the len with the full padding */
1360 len += padding;
f02e1e8a
DG
1361 }
1362
1363 while (len > 0) {
1d4dfdef
DG
1364 DBG("splice chan to pipe offset %lu of len %lu (fd : %d, pipe: %d)",
1365 (unsigned long)offset, len, fd, splice_pipe[1]);
fb3a43a9 1366 ret_splice = splice(fd, &offset, splice_pipe[1], NULL, len,
f02e1e8a
DG
1367 SPLICE_F_MOVE | SPLICE_F_MORE);
1368 DBG("splice chan to pipe, ret %zd", ret_splice);
1369 if (ret_splice < 0) {
1370 PERROR("Error in relay splice");
1371 if (written == 0) {
1372 written = ret_splice;
1373 }
1374 ret = errno;
1375 goto splice_error;
1376 }
1377
1378 /* Handle stream on the relayd if the output is on the network */
1379 if (relayd) {
1380 if (stream->metadata_flag) {
1d4dfdef
DG
1381 size_t metadata_payload_size =
1382 sizeof(struct lttcomm_relayd_metadata_payload);
1383
f02e1e8a 1384 /* Update counter to fit the spliced data */
1d4dfdef
DG
1385 ret_splice += metadata_payload_size;
1386 len += metadata_payload_size;
f02e1e8a
DG
1387 /*
1388 * We do this so the return value can match the len passed as
1389 * argument to this function.
1390 */
1d4dfdef 1391 written -= metadata_payload_size;
f02e1e8a
DG
1392 }
1393 }
1394
1395 /* Splice data out */
fb3a43a9 1396 ret_splice = splice(splice_pipe[0], NULL, outfd, NULL,
f02e1e8a 1397 ret_splice, SPLICE_F_MOVE | SPLICE_F_MORE);
1d4dfdef 1398 DBG("Consumer splice pipe to file, ret %zd", ret_splice);
f02e1e8a
DG
1399 if (ret_splice < 0) {
1400 PERROR("Error in file splice");
1401 if (written == 0) {
1402 written = ret_splice;
1403 }
1404 ret = errno;
1405 goto splice_error;
1406 } else if (ret_splice > len) {
1407 errno = EINVAL;
1408 PERROR("Wrote more data than requested %zd (len: %lu)",
1409 ret_splice, len);
1410 written += ret_splice;
1411 ret = errno;
1412 goto splice_error;
1413 }
1414 len -= ret_splice;
1415
1416 /* This call is useless on a socket so better save a syscall. */
1417 if (!relayd) {
1418 /* This won't block, but will start writeout asynchronously */
1419 lttng_sync_file_range(outfd, stream->out_fd_offset, ret_splice,
1420 SYNC_FILE_RANGE_WRITE);
1421 stream->out_fd_offset += ret_splice;
1422 }
1423 written += ret_splice;
1424 }
1425 lttng_consumer_sync_trace_file(stream, orig_offset);
1426
1427 ret = ret_splice;
1428
1429 goto end;
1430
1431splice_error:
1432 /* send the appropriate error description to sessiond */
1433 switch (ret) {
1434 case EBADF:
f73fabfd 1435 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_EBADF);
f02e1e8a
DG
1436 break;
1437 case EINVAL:
f73fabfd 1438 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_EINVAL);
f02e1e8a
DG
1439 break;
1440 case ENOMEM:
f73fabfd 1441 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_ENOMEM);
f02e1e8a
DG
1442 break;
1443 case ESPIPE:
f73fabfd 1444 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_SPLICE_ESPIPE);
f02e1e8a
DG
1445 break;
1446 }
1447
1448end:
1449 if (relayd && stream->metadata_flag) {
1450 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1451 }
1452
1453 rcu_read_unlock();
1454 return written;
3bd1e081
MD
1455}
1456
1457/*
1458 * Take a snapshot for a specific fd
1459 *
1460 * Returns 0 on success, < 0 on error
1461 */
1462int lttng_consumer_take_snapshot(struct lttng_consumer_local_data *ctx,
1463 struct lttng_consumer_stream *stream)
1464{
1465 switch (consumer_data.type) {
1466 case LTTNG_CONSUMER_KERNEL:
1467 return lttng_kconsumer_take_snapshot(ctx, stream);
7753dea8
MD
1468 case LTTNG_CONSUMER32_UST:
1469 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1470 return lttng_ustconsumer_take_snapshot(ctx, stream);
1471 default:
1472 ERR("Unknown consumer_data type");
1473 assert(0);
1474 return -ENOSYS;
1475 }
1476
1477}
1478
1479/*
1480 * Get the produced position
1481 *
1482 * Returns 0 on success, < 0 on error
1483 */
1484int lttng_consumer_get_produced_snapshot(
1485 struct lttng_consumer_local_data *ctx,
1486 struct lttng_consumer_stream *stream,
1487 unsigned long *pos)
1488{
1489 switch (consumer_data.type) {
1490 case LTTNG_CONSUMER_KERNEL:
1491 return lttng_kconsumer_get_produced_snapshot(ctx, stream, pos);
7753dea8
MD
1492 case LTTNG_CONSUMER32_UST:
1493 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1494 return lttng_ustconsumer_get_produced_snapshot(ctx, stream, pos);
1495 default:
1496 ERR("Unknown consumer_data type");
1497 assert(0);
1498 return -ENOSYS;
1499 }
1500}
1501
1502int lttng_consumer_recv_cmd(struct lttng_consumer_local_data *ctx,
1503 int sock, struct pollfd *consumer_sockpoll)
1504{
1505 switch (consumer_data.type) {
1506 case LTTNG_CONSUMER_KERNEL:
1507 return lttng_kconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
7753dea8
MD
1508 case LTTNG_CONSUMER32_UST:
1509 case LTTNG_CONSUMER64_UST:
3bd1e081
MD
1510 return lttng_ustconsumer_recv_cmd(ctx, sock, consumer_sockpoll);
1511 default:
1512 ERR("Unknown consumer_data type");
1513 assert(0);
1514 return -ENOSYS;
1515 }
1516}
1517
fb3a43a9 1518/*
f724d81e 1519 * Iterate over all streams of the hashtable and free them properly.
fb3a43a9
DG
1520 */
1521static void destroy_stream_ht(struct lttng_ht *ht)
1522{
1523 int ret;
1524 struct lttng_ht_iter iter;
1525 struct lttng_consumer_stream *stream;
1526
1527 if (ht == NULL) {
1528 return;
1529 }
1530
d09e1200 1531 rcu_read_lock();
fb3a43a9
DG
1532 cds_lfht_for_each_entry(ht->ht, &iter.iter, stream, node.node) {
1533 ret = lttng_ht_del(ht, &iter);
1534 assert(!ret);
1535
f724d81e 1536 call_rcu(&stream->node.head, consumer_free_stream);
fb3a43a9 1537 }
d09e1200 1538 rcu_read_unlock();
fb3a43a9
DG
1539
1540 lttng_ht_destroy(ht);
1541}
1542
1543/*
1544 * Clean up a metadata stream and free its memory.
1545 */
1546static void consumer_del_metadata_stream(struct lttng_consumer_stream *stream)
1547{
1548 int ret;
fb3a43a9
DG
1549 struct consumer_relayd_sock_pair *relayd;
1550
1551 assert(stream);
1552 /*
1553 * This call should NEVER receive regular stream. It must always be
1554 * metadata stream and this is crucial for data structure synchronization.
1555 */
1556 assert(stream->metadata_flag);
1557
1558 pthread_mutex_lock(&consumer_data.lock);
1559 switch (consumer_data.type) {
1560 case LTTNG_CONSUMER_KERNEL:
1561 if (stream->mmap_base != NULL) {
1562 ret = munmap(stream->mmap_base, stream->mmap_len);
1563 if (ret != 0) {
1564 PERROR("munmap metadata stream");
1565 }
1566 }
1567 break;
1568 case LTTNG_CONSUMER32_UST:
1569 case LTTNG_CONSUMER64_UST:
1570 lttng_ustconsumer_del_stream(stream);
1571 break;
1572 default:
1573 ERR("Unknown consumer_data type");
1574 assert(0);
1575 }
1576 pthread_mutex_unlock(&consumer_data.lock);
1577
1578 if (stream->out_fd >= 0) {
1579 ret = close(stream->out_fd);
1580 if (ret) {
1581 PERROR("close");
1582 }
1583 }
1584
1585 if (stream->wait_fd >= 0 && !stream->wait_fd_is_copy) {
1586 ret = close(stream->wait_fd);
1587 if (ret) {
1588 PERROR("close");
1589 }
1590 }
1591
1592 if (stream->shm_fd >= 0 && stream->wait_fd != stream->shm_fd) {
1593 ret = close(stream->shm_fd);
1594 if (ret) {
1595 PERROR("close");
1596 }
1597 }
1598
1599 /* Check and cleanup relayd */
1600 rcu_read_lock();
1601 relayd = consumer_find_relayd(stream->net_seq_idx);
1602 if (relayd != NULL) {
1603 uatomic_dec(&relayd->refcount);
1604 assert(uatomic_read(&relayd->refcount) >= 0);
1605
1606 /* Closing streams requires to lock the control socket. */
1607 pthread_mutex_lock(&relayd->ctrl_sock_mutex);
1608 ret = relayd_send_close_stream(&relayd->control_sock,
1609 stream->relayd_stream_id, stream->next_net_seq_num - 1);
1610 pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
1611 if (ret < 0) {
1612 DBG("Unable to close stream on the relayd. Continuing");
1613 /*
1614 * Continue here. There is nothing we can do for the relayd.
1615 * Chances are that the relayd has closed the socket so we just
1616 * continue cleaning up.
1617 */
1618 }
1619
1620 /* Both conditions are met, we destroy the relayd. */
1621 if (uatomic_read(&relayd->refcount) == 0 &&
1622 uatomic_read(&relayd->destroy_flag)) {
d09e1200 1623 destroy_relayd(relayd);
fb3a43a9
DG
1624 }
1625 }
1626 rcu_read_unlock();
1627
1628 /* Atomically decrement channel refcount since other threads can use it. */
1629 uatomic_dec(&stream->chan->refcount);
c30aaa51
MD
1630 if (!uatomic_read(&stream->chan->refcount)
1631 && !uatomic_read(&stream->chan->nb_init_streams)) {
1632 /* Go for channel deletion! */
1633 consumer_del_channel(stream->chan);
fb3a43a9
DG
1634 }
1635
f724d81e 1636 call_rcu(&stream->node.head, consumer_free_stream);
fb3a43a9
DG
1637}
1638
1639/*
1640 * Action done with the metadata stream when adding it to the consumer internal
1641 * data structures to handle it.
1642 */
1643static void consumer_add_metadata_stream(struct lttng_consumer_stream *stream)
1644{
1645 struct consumer_relayd_sock_pair *relayd;
1646
1647 /* Find relayd and, if one is found, increment refcount. */
1648 rcu_read_lock();
1649 relayd = consumer_find_relayd(stream->net_seq_idx);
1650 if (relayd != NULL) {
1651 uatomic_inc(&relayd->refcount);
1652 }
1653 rcu_read_unlock();
1654}
1655
1656/*
1657 * Thread polls on metadata file descriptor and write them on disk or on the
1658 * network.
1659 */
1660void *lttng_consumer_thread_poll_metadata(void *data)
1661{
1662 int ret, i, pollfd;
1663 uint32_t revents, nb_fd;
1664 struct lttng_consumer_stream *stream;
1665 struct lttng_ht_iter iter;
1666 struct lttng_ht_node_ulong *node;
1667 struct lttng_ht *metadata_ht = NULL;
1668 struct lttng_poll_event events;
1669 struct lttng_consumer_local_data *ctx = data;
1670 ssize_t len;
1671
1672 rcu_register_thread();
1673
1674 DBG("Thread metadata poll started");
1675
1676 metadata_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
1677 if (metadata_ht == NULL) {
1678 goto end;
1679 }
1680
1681 /* Size is set to 1 for the consumer_metadata pipe */
1682 ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
1683 if (ret < 0) {
1684 ERR("Poll set creation failed");
1685 goto end;
1686 }
1687
1688 ret = lttng_poll_add(&events, ctx->consumer_metadata_pipe[0], LPOLLIN);
1689 if (ret < 0) {
1690 goto end;
1691 }
1692
1693 /* Main loop */
1694 DBG("Metadata main loop started");
1695
1696 while (1) {
1697 lttng_poll_reset(&events);
1698
1699 nb_fd = LTTNG_POLL_GETNB(&events);
1700
1701 /* Only the metadata pipe is set */
1702 if (nb_fd == 0 && consumer_quit == 1) {
1703 goto end;
1704 }
1705
1706restart:
1707 DBG("Metadata poll wait with %d fd(s)", nb_fd);
1708 ret = lttng_poll_wait(&events, -1);
1709 DBG("Metadata event catched in thread");
1710 if (ret < 0) {
1711 if (errno == EINTR) {
1712 goto restart;
1713 }
1714 goto error;
1715 }
1716
1717 for (i = 0; i < nb_fd; i++) {
1718 revents = LTTNG_POLL_GETEV(&events, i);
1719 pollfd = LTTNG_POLL_GETFD(&events, i);
1720
1721 /* Check the metadata pipe for incoming metadata. */
1722 if (pollfd == ctx->consumer_metadata_pipe[0]) {
4adabd61 1723 if (revents & (LPOLLERR | LPOLLHUP )) {
fb3a43a9
DG
1724 DBG("Metadata thread pipe hung up");
1725 /*
1726 * Remove the pipe from the poll set and continue the loop
1727 * since their might be data to consume.
1728 */
1729 lttng_poll_del(&events, ctx->consumer_metadata_pipe[0]);
1730 close(ctx->consumer_metadata_pipe[0]);
1731 continue;
1732 } else if (revents & LPOLLIN) {
fb3a43a9 1733 do {
633d0084
DG
1734 /* Get the stream pointer received */
1735 ret = read(pollfd, &stream, sizeof(stream));
fb3a43a9 1736 } while (ret < 0 && errno == EINTR);
633d0084
DG
1737 if (ret < 0 ||
1738 ret < sizeof(struct lttng_consumer_stream *)) {
fb3a43a9 1739 PERROR("read metadata stream");
fb3a43a9
DG
1740 /*
1741 * Let's continue here and hope we can still work
1742 * without stopping the consumer. XXX: Should we?
1743 */
1744 continue;
1745 }
1746
1747 DBG("Adding metadata stream %d to poll set",
1748 stream->wait_fd);
1749
d09e1200 1750 rcu_read_lock();
fb3a43a9
DG
1751 /* The node should be init at this point */
1752 lttng_ht_add_unique_ulong(metadata_ht,
1753 &stream->waitfd_node);
d09e1200 1754 rcu_read_unlock();
fb3a43a9
DG
1755
1756 /* Add metadata stream to the global poll events list */
1757 lttng_poll_add(&events, stream->wait_fd,
1758 LPOLLIN | LPOLLPRI);
1759
1760 consumer_add_metadata_stream(stream);
1761 }
1762
1763 /* Metadata pipe handled. Continue handling the others */
1764 continue;
1765 }
1766
1767 /* From here, the event is a metadata wait fd */
1768
d09e1200 1769 rcu_read_lock();
fb3a43a9
DG
1770 lttng_ht_lookup(metadata_ht, (void *)((unsigned long) pollfd),
1771 &iter);
1772 node = lttng_ht_iter_get_node_ulong(&iter);
1773 if (node == NULL) {
1774 /* FD not found, continue loop */
d09e1200 1775 rcu_read_unlock();
fb3a43a9
DG
1776 continue;
1777 }
1778
1779 stream = caa_container_of(node, struct lttng_consumer_stream,
1780 waitfd_node);
1781
1782 /* Get the data out of the metadata file descriptor */
1783 if (revents & (LPOLLIN | LPOLLPRI)) {
1784 DBG("Metadata available on fd %d", pollfd);
1785 assert(stream->wait_fd == pollfd);
1786
1787 len = ctx->on_buffer_ready(stream, ctx);
1788 /* It's ok to have an unavailable sub-buffer */
1789 if (len < 0 && len != -EAGAIN) {
d09e1200 1790 rcu_read_unlock();
fb3a43a9
DG
1791 goto end;
1792 } else if (len > 0) {
1793 stream->data_read = 1;
1794 }
1795 }
1796
1797 /*
1798 * Remove the stream from the hash table since there is no data
1799 * left on the fd because we previously did a read on the buffer.
1800 */
4adabd61 1801 if (revents & (LPOLLERR | LPOLLHUP)) {
fb3a43a9
DG
1802 DBG("Metadata fd %d is hup|err|nval.", pollfd);
1803 if (!stream->hangup_flush_done
1804 && (consumer_data.type == LTTNG_CONSUMER32_UST
1805 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
1806 DBG("Attempting to flush and consume the UST buffers");
1807 lttng_ustconsumer_on_stream_hangup(stream);
1808
1809 /* We just flushed the stream now read it. */
1810 len = ctx->on_buffer_ready(stream, ctx);
1811 /* It's ok to have an unavailable sub-buffer */
1812 if (len < 0 && len != -EAGAIN) {
d09e1200 1813 rcu_read_unlock();
fb3a43a9
DG
1814 goto end;
1815 }
1816 }
1817
1818 /* Removing it from hash table, poll set and free memory */
1819 lttng_ht_del(metadata_ht, &iter);
d09e1200 1820
fb3a43a9
DG
1821 lttng_poll_del(&events, stream->wait_fd);
1822 consumer_del_metadata_stream(stream);
1823 }
d09e1200 1824 rcu_read_unlock();
fb3a43a9
DG
1825 }
1826 }
1827
1828error:
1829end:
1830 DBG("Metadata poll thread exiting");
1831 lttng_poll_clean(&events);
1832
1833 if (metadata_ht) {
1834 destroy_stream_ht(metadata_ht);
1835 }
1836
1837 rcu_unregister_thread();
1838 return NULL;
1839}
1840
3bd1e081 1841/*
e4421fec 1842 * This thread polls the fds in the set to consume the data and write
3bd1e081
MD
1843 * it to tracefile if necessary.
1844 */
1845void *lttng_consumer_thread_poll_fds(void *data)
1846{
1847 int num_rdy, num_hup, high_prio, ret, i;
1848 struct pollfd *pollfd = NULL;
1849 /* local view of the streams */
1850 struct lttng_consumer_stream **local_stream = NULL;
1851 /* local view of consumer_data.fds_count */
1852 int nb_fd = 0;
3bd1e081 1853 struct lttng_consumer_local_data *ctx = data;
00e2e675 1854 ssize_t len;
fb3a43a9
DG
1855 pthread_t metadata_thread;
1856 void *status;
3bd1e081 1857
e7b994a3
DG
1858 rcu_register_thread();
1859
fb3a43a9
DG
1860 /* Start metadata polling thread */
1861 ret = pthread_create(&metadata_thread, NULL,
1862 lttng_consumer_thread_poll_metadata, (void *) ctx);
1863 if (ret < 0) {
1864 PERROR("pthread_create metadata thread");
1865 goto end;
1866 }
1867
effcf122 1868 local_stream = zmalloc(sizeof(struct lttng_consumer_stream));
3bd1e081
MD
1869
1870 while (1) {
1871 high_prio = 0;
1872 num_hup = 0;
1873
1874 /*
e4421fec 1875 * the fds set has been updated, we need to update our
3bd1e081
MD
1876 * local array as well
1877 */
1878 pthread_mutex_lock(&consumer_data.lock);
1879 if (consumer_data.need_update) {
1880 if (pollfd != NULL) {
1881 free(pollfd);
1882 pollfd = NULL;
1883 }
1884 if (local_stream != NULL) {
1885 free(local_stream);
1886 local_stream = NULL;
1887 }
1888
1889 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1890 pollfd = zmalloc((consumer_data.stream_count + 1) * sizeof(struct pollfd));
3bd1e081 1891 if (pollfd == NULL) {
7a57cf92 1892 PERROR("pollfd malloc");
3bd1e081
MD
1893 pthread_mutex_unlock(&consumer_data.lock);
1894 goto end;
1895 }
1896
1897 /* allocate for all fds + 1 for the consumer_poll_pipe */
effcf122 1898 local_stream = zmalloc((consumer_data.stream_count + 1) *
3bd1e081
MD
1899 sizeof(struct lttng_consumer_stream));
1900 if (local_stream == NULL) {
7a57cf92 1901 PERROR("local_stream malloc");
3bd1e081
MD
1902 pthread_mutex_unlock(&consumer_data.lock);
1903 goto end;
1904 }
fb3a43a9 1905 ret = consumer_update_poll_array(ctx, &pollfd, local_stream);
3bd1e081
MD
1906 if (ret < 0) {
1907 ERR("Error in allocating pollfd or local_outfds");
f73fabfd 1908 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
3bd1e081
MD
1909 pthread_mutex_unlock(&consumer_data.lock);
1910 goto end;
1911 }
1912 nb_fd = ret;
1913 consumer_data.need_update = 0;
1914 }
1915 pthread_mutex_unlock(&consumer_data.lock);
1916
4078b776
MD
1917 /* No FDs and consumer_quit, consumer_cleanup the thread */
1918 if (nb_fd == 0 && consumer_quit == 1) {
1919 goto end;
1920 }
3bd1e081 1921 /* poll on the array of fds */
88f2b785 1922 restart:
3bd1e081
MD
1923 DBG("polling on %d fd", nb_fd + 1);
1924 num_rdy = poll(pollfd, nb_fd + 1, consumer_poll_timeout);
1925 DBG("poll num_rdy : %d", num_rdy);
1926 if (num_rdy == -1) {
88f2b785
MD
1927 /*
1928 * Restart interrupted system call.
1929 */
1930 if (errno == EINTR) {
1931 goto restart;
1932 }
7a57cf92 1933 PERROR("Poll error");
f73fabfd 1934 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_POLL_ERROR);
3bd1e081
MD
1935 goto end;
1936 } else if (num_rdy == 0) {
1937 DBG("Polling thread timed out");
1938 goto end;
1939 }
1940
3bd1e081 1941 /*
00e2e675
DG
1942 * If the consumer_poll_pipe triggered poll go directly to the
1943 * beginning of the loop to update the array. We want to prioritize
1944 * array update over low-priority reads.
3bd1e081 1945 */
509bb1cf 1946 if (pollfd[nb_fd].revents & (POLLIN | POLLPRI)) {
04fdd819
MD
1947 size_t pipe_readlen;
1948 char tmp;
1949
3bd1e081 1950 DBG("consumer_poll_pipe wake up");
04fdd819
MD
1951 /* Consume 1 byte of pipe data */
1952 do {
1953 pipe_readlen = read(ctx->consumer_poll_pipe[0], &tmp, 1);
1954 } while (pipe_readlen == -1 && errno == EINTR);
3bd1e081
MD
1955 continue;
1956 }
1957
1958 /* Take care of high priority channels first. */
1959 for (i = 0; i < nb_fd; i++) {
fb3a43a9 1960 if (pollfd[i].revents & POLLPRI) {
d41f73b7
MD
1961 DBG("Urgent read on fd %d", pollfd[i].fd);
1962 high_prio = 1;
4078b776 1963 len = ctx->on_buffer_ready(local_stream[i], ctx);
d41f73b7 1964 /* it's ok to have an unavailable sub-buffer */
4078b776
MD
1965 if (len < 0 && len != -EAGAIN) {
1966 goto end;
1967 } else if (len > 0) {
1968 local_stream[i]->data_read = 1;
d41f73b7 1969 }
3bd1e081
MD
1970 }
1971 }
1972
4078b776
MD
1973 /*
1974 * If we read high prio channel in this loop, try again
1975 * for more high prio data.
1976 */
1977 if (high_prio) {
3bd1e081
MD
1978 continue;
1979 }
1980
1981 /* Take care of low priority channels. */
4078b776
MD
1982 for (i = 0; i < nb_fd; i++) {
1983 if ((pollfd[i].revents & POLLIN) ||
1984 local_stream[i]->hangup_flush_done) {
4078b776
MD
1985 DBG("Normal read on fd %d", pollfd[i].fd);
1986 len = ctx->on_buffer_ready(local_stream[i], ctx);
1987 /* it's ok to have an unavailable sub-buffer */
1988 if (len < 0 && len != -EAGAIN) {
1989 goto end;
1990 } else if (len > 0) {
1991 local_stream[i]->data_read = 1;
1992 }
1993 }
1994 }
1995
1996 /* Handle hangup and errors */
1997 for (i = 0; i < nb_fd; i++) {
1998 if (!local_stream[i]->hangup_flush_done
1999 && (pollfd[i].revents & (POLLHUP | POLLERR | POLLNVAL))
2000 && (consumer_data.type == LTTNG_CONSUMER32_UST
2001 || consumer_data.type == LTTNG_CONSUMER64_UST)) {
2002 DBG("fd %d is hup|err|nval. Attempting flush and read.",
2003 pollfd[i].fd);
2004 lttng_ustconsumer_on_stream_hangup(local_stream[i]);
2005 /* Attempt read again, for the data we just flushed. */
2006 local_stream[i]->data_read = 1;
2007 }
2008 /*
2009 * If the poll flag is HUP/ERR/NVAL and we have
2010 * read no data in this pass, we can remove the
2011 * stream from its hash table.
2012 */
2013 if ((pollfd[i].revents & POLLHUP)) {
2014 DBG("Polling fd %d tells it has hung up.", pollfd[i].fd);
2015 if (!local_stream[i]->data_read) {
702b1ea4 2016 consumer_del_stream(local_stream[i]);
4078b776
MD
2017 num_hup++;
2018 }
2019 } else if (pollfd[i].revents & POLLERR) {
2020 ERR("Error returned in polling fd %d.", pollfd[i].fd);
2021 if (!local_stream[i]->data_read) {
702b1ea4 2022 consumer_del_stream(local_stream[i]);
4078b776
MD
2023 num_hup++;
2024 }
2025 } else if (pollfd[i].revents & POLLNVAL) {
2026 ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
2027 if (!local_stream[i]->data_read) {
702b1ea4 2028 consumer_del_stream(local_stream[i]);
4078b776 2029 num_hup++;
3bd1e081
MD
2030 }
2031 }
4078b776 2032 local_stream[i]->data_read = 0;
3bd1e081
MD
2033 }
2034 }
2035end:
2036 DBG("polling thread exiting");
2037 if (pollfd != NULL) {
2038 free(pollfd);
2039 pollfd = NULL;
2040 }
2041 if (local_stream != NULL) {
2042 free(local_stream);
2043 local_stream = NULL;
2044 }
fb3a43a9
DG
2045
2046 /*
2047 * Close the write side of the pipe so epoll_wait() in
2048 * lttng_consumer_thread_poll_metadata can catch it. The thread is
2049 * monitoring the read side of the pipe. If we close them both, epoll_wait
2050 * strangely does not return and could create a endless wait period if the
2051 * pipe is the only tracked fd in the poll set. The thread will take care
2052 * of closing the read side.
2053 */
2054 close(ctx->consumer_metadata_pipe[1]);
2055 if (ret) {
2056 ret = pthread_join(metadata_thread, &status);
2057 if (ret < 0) {
2058 PERROR("pthread_join metadata thread");
2059 }
2060 }
2061
e7b994a3 2062 rcu_unregister_thread();
3bd1e081
MD
2063 return NULL;
2064}
2065
2066/*
2067 * This thread listens on the consumerd socket and receives the file
2068 * descriptors from the session daemon.
2069 */
2070void *lttng_consumer_thread_receive_fds(void *data)
2071{
2072 int sock, client_socket, ret;
2073 /*
2074 * structure to poll for incoming data on communication socket avoids
2075 * making blocking sockets.
2076 */
2077 struct pollfd consumer_sockpoll[2];
2078 struct lttng_consumer_local_data *ctx = data;
2079
e7b994a3
DG
2080 rcu_register_thread();
2081
3bd1e081
MD
2082 DBG("Creating command socket %s", ctx->consumer_command_sock_path);
2083 unlink(ctx->consumer_command_sock_path);
2084 client_socket = lttcomm_create_unix_sock(ctx->consumer_command_sock_path);
2085 if (client_socket < 0) {
2086 ERR("Cannot create command socket");
2087 goto end;
2088 }
2089
2090 ret = lttcomm_listen_unix_sock(client_socket);
2091 if (ret < 0) {
2092 goto end;
2093 }
2094
32258573 2095 DBG("Sending ready command to lttng-sessiond");
f73fabfd 2096 ret = lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_COMMAND_SOCK_READY);
3bd1e081
MD
2097 /* return < 0 on error, but == 0 is not fatal */
2098 if (ret < 0) {
32258573 2099 ERR("Error sending ready command to lttng-sessiond");
3bd1e081
MD
2100 goto end;
2101 }
2102
2103 ret = fcntl(client_socket, F_SETFL, O_NONBLOCK);
2104 if (ret < 0) {
7a57cf92 2105 PERROR("fcntl O_NONBLOCK");
3bd1e081
MD
2106 goto end;
2107 }
2108
2109 /* prepare the FDs to poll : to client socket and the should_quit pipe */
2110 consumer_sockpoll[0].fd = ctx->consumer_should_quit[0];
2111 consumer_sockpoll[0].events = POLLIN | POLLPRI;
2112 consumer_sockpoll[1].fd = client_socket;
2113 consumer_sockpoll[1].events = POLLIN | POLLPRI;
2114
2115 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2116 goto end;
2117 }
2118 DBG("Connection on client_socket");
2119
2120 /* Blocking call, waiting for transmission */
2121 sock = lttcomm_accept_unix_sock(client_socket);
2122 if (sock <= 0) {
2123 WARN("On accept");
2124 goto end;
2125 }
2126 ret = fcntl(sock, F_SETFL, O_NONBLOCK);
2127 if (ret < 0) {
7a57cf92 2128 PERROR("fcntl O_NONBLOCK");
3bd1e081
MD
2129 goto end;
2130 }
2131
2132 /* update the polling structure to poll on the established socket */
2133 consumer_sockpoll[1].fd = sock;
2134 consumer_sockpoll[1].events = POLLIN | POLLPRI;
2135
2136 while (1) {
2137 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2138 goto end;
2139 }
2140 DBG("Incoming command on sock");
2141 ret = lttng_consumer_recv_cmd(ctx, sock, consumer_sockpoll);
2142 if (ret == -ENOENT) {
2143 DBG("Received STOP command");
2144 goto end;
2145 }
4cbc1a04
DG
2146 if (ret <= 0) {
2147 /*
2148 * This could simply be a session daemon quitting. Don't output
2149 * ERR() here.
2150 */
2151 DBG("Communication interrupted on command socket");
3bd1e081
MD
2152 goto end;
2153 }
2154 if (consumer_quit) {
2155 DBG("consumer_thread_receive_fds received quit from signal");
2156 goto end;
2157 }
2158 DBG("received fds on sock");
2159 }
2160end:
2161 DBG("consumer_thread_receive_fds exiting");
2162
2163 /*
2164 * when all fds have hung up, the polling thread
2165 * can exit cleanly
2166 */
2167 consumer_quit = 1;
2168
2169 /*
2170 * 2s of grace period, if no polling events occur during
2171 * this period, the polling thread will exit even if there
2172 * are still open FDs (should not happen, but safety mechanism).
2173 */
2174 consumer_poll_timeout = LTTNG_CONSUMER_POLL_TIMEOUT;
2175
04fdd819
MD
2176 /*
2177 * Wake-up the other end by writing a null byte in the pipe
2178 * (non-blocking). Important note: Because writing into the
2179 * pipe is non-blocking (and therefore we allow dropping wakeup
2180 * data, as long as there is wakeup data present in the pipe
2181 * buffer to wake up the other end), the other end should
2182 * perform the following sequence for waiting:
2183 * 1) empty the pipe (reads).
2184 * 2) perform update operation.
2185 * 3) wait on the pipe (poll).
2186 */
2187 do {
2188 ret = write(ctx->consumer_poll_pipe[1], "", 1);
6f94560a 2189 } while (ret < 0 && errno == EINTR);
e7b994a3 2190 rcu_unregister_thread();
3bd1e081
MD
2191 return NULL;
2192}
d41f73b7 2193
4078b776 2194ssize_t lttng_consumer_read_subbuffer(struct lttng_consumer_stream *stream,
d41f73b7
MD
2195 struct lttng_consumer_local_data *ctx)
2196{
2197 switch (consumer_data.type) {
2198 case LTTNG_CONSUMER_KERNEL:
2199 return lttng_kconsumer_read_subbuffer(stream, ctx);
7753dea8
MD
2200 case LTTNG_CONSUMER32_UST:
2201 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
2202 return lttng_ustconsumer_read_subbuffer(stream, ctx);
2203 default:
2204 ERR("Unknown consumer_data type");
2205 assert(0);
2206 return -ENOSYS;
2207 }
2208}
2209
2210int lttng_consumer_on_recv_stream(struct lttng_consumer_stream *stream)
2211{
2212 switch (consumer_data.type) {
2213 case LTTNG_CONSUMER_KERNEL:
2214 return lttng_kconsumer_on_recv_stream(stream);
7753dea8
MD
2215 case LTTNG_CONSUMER32_UST:
2216 case LTTNG_CONSUMER64_UST:
d41f73b7
MD
2217 return lttng_ustconsumer_on_recv_stream(stream);
2218 default:
2219 ERR("Unknown consumer_data type");
2220 assert(0);
2221 return -ENOSYS;
2222 }
2223}
e4421fec
DG
2224
2225/*
2226 * Allocate and set consumer data hash tables.
2227 */
2228void lttng_consumer_init(void)
2229{
2230 consumer_data.stream_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
2231 consumer_data.channel_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
00e2e675 2232 consumer_data.relayd_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
e4421fec 2233}
7735ef9e
DG
2234
2235/*
2236 * Process the ADD_RELAYD command receive by a consumer.
2237 *
2238 * This will create a relayd socket pair and add it to the relayd hash table.
2239 * The caller MUST acquire a RCU read side lock before calling it.
2240 */
2241int consumer_add_relayd_socket(int net_seq_idx, int sock_type,
2242 struct lttng_consumer_local_data *ctx, int sock,
2243 struct pollfd *consumer_sockpoll, struct lttcomm_sock *relayd_sock)
2244{
2245 int fd, ret = -1;
2246 struct consumer_relayd_sock_pair *relayd;
2247
2248 DBG("Consumer adding relayd socket (idx: %d)", net_seq_idx);
2249
2250 /* Get relayd reference if exists. */
2251 relayd = consumer_find_relayd(net_seq_idx);
2252 if (relayd == NULL) {
2253 /* Not found. Allocate one. */
2254 relayd = consumer_allocate_relayd_sock_pair(net_seq_idx);
2255 if (relayd == NULL) {
f73fabfd 2256 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
7735ef9e
DG
2257 goto error;
2258 }
2259 }
2260
2261 /* Poll on consumer socket. */
2262 if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
2263 ret = -EINTR;
2264 goto error;
2265 }
2266
2267 /* Get relayd socket from session daemon */
2268 ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
2269 if (ret != sizeof(fd)) {
f73fabfd 2270 lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
7735ef9e
DG
2271 ret = -1;
2272 goto error;
2273 }
2274
2275 /* Copy socket information and received FD */
2276 switch (sock_type) {
2277 case LTTNG_STREAM_CONTROL:
2278 /* Copy received lttcomm socket */
2279 lttcomm_copy_sock(&relayd->control_sock, relayd_sock);
2280 ret = lttcomm_create_sock(&relayd->control_sock);
2281 if (ret < 0) {
2282 goto error;
2283 }
2284
2285 /* Close the created socket fd which is useless */
2286 close(relayd->control_sock.fd);
2287
2288 /* Assign new file descriptor */
2289 relayd->control_sock.fd = fd;
2290 break;
2291 case LTTNG_STREAM_DATA:
2292 /* Copy received lttcomm socket */
2293 lttcomm_copy_sock(&relayd->data_sock, relayd_sock);
2294 ret = lttcomm_create_sock(&relayd->data_sock);
2295 if (ret < 0) {
2296 goto error;
2297 }
2298
2299 /* Close the created socket fd which is useless */
2300 close(relayd->data_sock.fd);
2301
2302 /* Assign new file descriptor */
2303 relayd->data_sock.fd = fd;
2304 break;
2305 default:
2306 ERR("Unknown relayd socket type (%d)", sock_type);
2307 goto error;
2308 }
2309
2310 DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
2311 sock_type == LTTNG_STREAM_CONTROL ? "control" : "data",
2312 relayd->net_seq_idx, fd);
2313
2314 /*
2315 * Add relayd socket pair to consumer data hashtable. If object already
2316 * exists or on error, the function gracefully returns.
2317 */
d09e1200 2318 add_relayd(relayd);
7735ef9e
DG
2319
2320 /* All good! */
2321 ret = 0;
2322
2323error:
2324 return ret;
2325}
This page took 0.135306 seconds and 4 git commands to generate.