relayd: add extra debug output
[lttng-tools.git] / src / bin / lttng-relayd / stream.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 * 2015 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * 2019 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License, version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #define _LGPL_SOURCE
22 #include <common/common.h>
23 #include <common/utils.h>
24 #include <common/defaults.h>
25 #include <common/sessiond-comm/relayd.h>
26 #include <urcu/rculist.h>
27 #include <sys/stat.h>
28
29 #include "lttng-relayd.h"
30 #include "index.h"
31 #include "stream.h"
32 #include "viewer-stream.h"
33
34 #include <sys/types.h>
35 #include <fcntl.h>
36
37 #define FILE_IO_STACK_BUFFER_SIZE 65536
38
39 /* Should be called with RCU read-side lock held. */
40 bool stream_get(struct relay_stream *stream)
41 {
42 return urcu_ref_get_unless_zero(&stream->ref);
43 }
44
45 /*
46 * Get stream from stream id from the streams hash table. Return stream
47 * if found else NULL. A stream reference is taken when a stream is
48 * returned. stream_put() must be called on that stream.
49 */
50 struct relay_stream *stream_get_by_id(uint64_t stream_id)
51 {
52 struct lttng_ht_node_u64 *node;
53 struct lttng_ht_iter iter;
54 struct relay_stream *stream = NULL;
55
56 rcu_read_lock();
57 lttng_ht_lookup(relay_streams_ht, &stream_id, &iter);
58 node = lttng_ht_iter_get_node_u64(&iter);
59 if (!node) {
60 DBG("Relay stream %" PRIu64 " not found", stream_id);
61 goto end;
62 }
63 stream = caa_container_of(node, struct relay_stream, node);
64 if (!stream_get(stream)) {
65 stream = NULL;
66 }
67 end:
68 rcu_read_unlock();
69 return stream;
70 }
71
72 static void stream_complete_rotation(struct relay_stream *stream)
73 {
74 DBG("Rotation completed for stream %" PRIu64, stream->stream_handle);
75 lttng_trace_chunk_put(stream->trace_chunk);
76 stream->trace_chunk = stream->ongoing_rotation.value.next_trace_chunk;
77 stream->ongoing_rotation = (typeof(stream->ongoing_rotation)) {};
78 }
79
80 static int stream_create_data_output_file_from_trace_chunk(
81 struct relay_stream *stream,
82 struct lttng_trace_chunk *trace_chunk,
83 bool force_unlink,
84 struct stream_fd **out_stream_fd)
85 {
86 int ret, fd;
87 char stream_path[LTTNG_PATH_MAX];
88 enum lttng_trace_chunk_status status;
89 const int flags = O_RDWR | O_CREAT | O_TRUNC;
90 const mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
91
92 ASSERT_LOCKED(stream->lock);
93
94 ret = utils_stream_file_path(stream->path_name, stream->channel_name,
95 stream->tracefile_size, stream->tracefile_current_index,
96 NULL, stream_path, sizeof(stream_path));
97 if (ret < 0) {
98 goto end;
99 }
100
101 if (stream->tracefile_wrapped_around || force_unlink) {
102 /*
103 * The on-disk ring-buffer has wrapped around.
104 * Newly created stream files will replace existing files. Since
105 * live clients may be consuming existing files, the file about
106 * to be replaced is unlinked in order to not overwrite its
107 * content.
108 */
109 status = lttng_trace_chunk_unlink_file(trace_chunk,
110 stream_path);
111 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
112 PERROR("Failed to unlink stream file \"%s\" during trace file rotation",
113 stream_path);
114 /*
115 * Don't abort if the file doesn't exist, it is
116 * unexpected, but should not be a fatal error.
117 */
118 if (errno != ENOENT) {
119 ret = -1;
120 goto end;
121 }
122 }
123 }
124
125 status = lttng_trace_chunk_open_file(
126 trace_chunk, stream_path, flags, mode, &fd, false);
127 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
128 ERR("Failed to open stream file \"%s\"", stream->channel_name);
129 ret = -1;
130 goto end;
131 }
132
133 *out_stream_fd = stream_fd_create(fd);
134 if (!*out_stream_fd) {
135 if (close(ret)) {
136 PERROR("Error closing stream file descriptor %d", ret);
137 }
138 ret = -1;
139 goto end;
140 }
141 end:
142 return ret;
143 }
144
145 static int stream_rotate_data_file(struct relay_stream *stream)
146 {
147 int ret = 0;
148
149 DBG("Rotating stream %" PRIu64 " data file with size %" PRIu64,
150 stream->stream_handle, stream->tracefile_size_current);
151
152 if (stream->stream_fd) {
153 stream_fd_put(stream->stream_fd);
154 stream->stream_fd = NULL;
155 }
156
157 stream->tracefile_wrapped_around = false;
158 stream->tracefile_current_index = 0;
159
160 if (stream->ongoing_rotation.value.next_trace_chunk) {
161 struct stream_fd *new_stream_fd = NULL;
162 enum lttng_trace_chunk_status chunk_status;
163
164 chunk_status = lttng_trace_chunk_create_subdirectory(
165 stream->ongoing_rotation.value.next_trace_chunk,
166 stream->path_name);
167 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
168 ret = -1;
169 goto end;
170 }
171
172 /* Rotate the data file. */
173 ret = stream_create_data_output_file_from_trace_chunk(stream,
174 stream->ongoing_rotation.value.next_trace_chunk,
175 false, &new_stream_fd);
176 stream->stream_fd = new_stream_fd;
177 if (ret < 0) {
178 ERR("Failed to rotate stream data file");
179 goto end;
180 }
181 }
182 DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
183 __func__, stream->stream_handle, stream->tracefile_size_current);
184 stream->tracefile_size_current = 0;
185 stream->pos_after_last_complete_data_index = 0;
186 stream->ongoing_rotation.value.data_rotated = true;
187
188 if (stream->ongoing_rotation.value.index_rotated) {
189 /* Rotation completed; reset its state. */
190 stream_complete_rotation(stream);
191 }
192 end:
193 return ret;
194 }
195
196 /*
197 * If too much data has been written in a tracefile before we received the
198 * rotation command, we have to move the excess data to the new tracefile and
199 * perform the rotation. This can happen because the control and data
200 * connections are separate, the indexes as well as the commands arrive from
201 * the control connection and we have no control over the order so we could be
202 * in a situation where too much data has been received on the data connection
203 * before the rotation command on the control connection arrives.
204 */
205 static int rotate_truncate_stream(struct relay_stream *stream)
206 {
207 int ret;
208 off_t lseek_ret, previous_stream_copy_origin;
209 uint64_t copy_bytes_left, misplaced_data_size;
210 bool acquired_reference;
211 struct stream_fd *previous_stream_fd = NULL;
212 struct lttng_trace_chunk *previous_chunk = NULL;
213
214 if (!LTTNG_OPTIONAL_GET(stream->ongoing_rotation).next_trace_chunk) {
215 ERR("Protocol error encoutered in %s(): stream rotation "
216 "sequence number is before the current sequence number "
217 "and the next trace chunk is unset. Honoring this "
218 "rotation command would result in data loss",
219 __FUNCTION__);
220 ret = -1;
221 goto end;
222 }
223
224 ASSERT_LOCKED(stream->lock);
225 /*
226 * Acquire a reference to the current trace chunk to ensure
227 * it is not reclaimed when `stream_rotate_data_file` is called.
228 * Failing to do so would violate the contract of the trace
229 * chunk API as an active file descriptor would outlive the
230 * trace chunk.
231 */
232 acquired_reference = lttng_trace_chunk_get(stream->trace_chunk);
233 assert(acquired_reference);
234 previous_chunk = stream->trace_chunk;
235
236 /*
237 * Steal the stream's reference to its stream_fd. A new
238 * stream_fd will be created when the rotation completes and
239 * the orinal stream_fd will be used to copy the "extra" data
240 * to the new file.
241 */
242 assert(stream->stream_fd);
243 previous_stream_fd = stream->stream_fd;
244 stream->stream_fd = NULL;
245
246 assert(!stream->is_metadata);
247 assert(stream->tracefile_size_current >
248 stream->pos_after_last_complete_data_index);
249 misplaced_data_size = stream->tracefile_size_current -
250 stream->pos_after_last_complete_data_index;
251 copy_bytes_left = misplaced_data_size;
252 previous_stream_copy_origin = stream->pos_after_last_complete_data_index;
253
254 ret = stream_rotate_data_file(stream);
255 if (ret) {
256 goto end;
257 }
258
259 assert(stream->stream_fd);
260 /*
261 * Seek the current tracefile to the position at which the rotation
262 * should have occurred.
263 */
264 lseek_ret = lseek(previous_stream_fd->fd, previous_stream_copy_origin,
265 SEEK_SET);
266 if (lseek_ret < 0) {
267 PERROR("Failed to seek to offset %" PRIu64
268 " while copying extra data received before a stream rotation",
269 (uint64_t) previous_stream_copy_origin);
270 ret = -1;
271 goto end;
272 }
273
274 /* Move data from the old file to the new file. */
275 while (copy_bytes_left) {
276 ssize_t io_ret;
277 char copy_buffer[FILE_IO_STACK_BUFFER_SIZE];
278 const off_t copy_size_this_pass = min_t(
279 off_t, copy_bytes_left, sizeof(copy_buffer));
280
281 io_ret = lttng_read(previous_stream_fd->fd, copy_buffer,
282 copy_size_this_pass);
283 if (io_ret < (ssize_t) copy_size_this_pass) {
284 if (io_ret == -1) {
285 PERROR("Failed to read %" PRIu64
286 " bytes from fd %i in %s(), returned %zi",
287 copy_size_this_pass,
288 previous_stream_fd->fd,
289 __FUNCTION__, io_ret);
290 } else {
291 ERR("Failed to read %" PRIu64
292 " bytes from fd %i in %s(), returned %zi",
293 copy_size_this_pass,
294 previous_stream_fd->fd,
295 __FUNCTION__, io_ret);
296 }
297 ret = -1;
298 goto end;
299 }
300
301 io_ret = lttng_write(stream->stream_fd->fd, copy_buffer,
302 copy_size_this_pass);
303 if (io_ret < (ssize_t) copy_size_this_pass) {
304 if (io_ret == -1) {
305 PERROR("Failed to write %" PRIu64
306 " bytes from fd %i in %s(), returned %zi",
307 copy_size_this_pass,
308 stream->stream_fd->fd,
309 __FUNCTION__, io_ret);
310 } else {
311 ERR("Failed to write %" PRIu64
312 " bytes from fd %i in %s(), returned %zi",
313 copy_size_this_pass,
314 stream->stream_fd->fd,
315 __FUNCTION__, io_ret);
316 }
317 ret = -1;
318 goto end;
319 }
320 copy_bytes_left -= copy_size_this_pass;
321 }
322
323 /* Truncate the file to get rid of the excess data. */
324 ret = ftruncate(previous_stream_fd->fd, previous_stream_copy_origin);
325 if (ret) {
326 PERROR("Failed to truncate current stream file to offset %" PRIu64,
327 previous_stream_copy_origin);
328 goto end;
329 }
330
331 /*
332 * Update the offset and FD of all the eventual indexes created by the
333 * data connection before the rotation command arrived.
334 */
335 ret = relay_index_switch_all_files(stream);
336 if (ret < 0) {
337 ERR("Failed to rotate index file");
338 goto end;
339 }
340
341 stream->tracefile_size_current = misplaced_data_size;
342 /* Index and data contents are back in sync. */
343 stream->pos_after_last_complete_data_index = 0;
344 ret = 0;
345 end:
346 lttng_trace_chunk_put(previous_chunk);
347 stream_fd_put(previous_stream_fd);
348 return ret;
349 }
350
351 /*
352 * Check if a stream's data file (as opposed to index) should be rotated
353 * (for session rotation).
354 * Must be called with the stream lock held.
355 *
356 * Return 0 on success, a negative value on error.
357 */
358 static int try_rotate_stream_data(struct relay_stream *stream)
359 {
360 int ret = 0;
361
362 if (caa_likely(!stream->ongoing_rotation.is_set)) {
363 /* No rotation expected. */
364 goto end;
365 }
366
367 if (stream->ongoing_rotation.value.data_rotated) {
368 /* Rotation of the data file has already occurred. */
369 goto end;
370 }
371
372 DBG("%s: Stream %" PRIu64
373 " (rotate_at_index_packet_seq_num = %" PRIu64
374 ", rotate_at_prev_data_net_seq = %" PRIu64
375 ", prev_data_seq = %" PRIu64 ")",
376 __func__, stream->stream_handle,
377 stream->ongoing_rotation.value.packet_seq_num,
378 stream->ongoing_rotation.value.prev_data_net_seq,
379 stream->prev_data_seq);
380
381 if (stream->prev_data_seq == -1ULL ||
382 stream->ongoing_rotation.value.prev_data_net_seq == -1ULL ||
383 stream->prev_data_seq <
384 stream->ongoing_rotation.value.prev_data_net_seq) {
385 /*
386 * The next packet that will be written is not part of the next
387 * chunk yet.
388 */
389 DBG("Stream %" PRIu64 " data not yet ready for rotation "
390 "(rotate_at_index_packet_seq_num = %" PRIu64
391 ", rotate_at_prev_data_net_seq = %" PRIu64
392 ", prev_data_seq = %" PRIu64 ")",
393 stream->stream_handle,
394 stream->ongoing_rotation.value.packet_seq_num,
395 stream->ongoing_rotation.value.prev_data_net_seq,
396 stream->prev_data_seq);
397 goto end;
398 } else if (stream->prev_data_seq > stream->ongoing_rotation.value.prev_data_net_seq) {
399 /*
400 * prev_data_seq is checked here since indexes and rotation
401 * commands are serialized with respect to each other.
402 */
403 DBG("Rotation after too much data has been written in tracefile "
404 "for stream %" PRIu64 ", need to truncate before "
405 "rotating", stream->stream_handle);
406 ret = rotate_truncate_stream(stream);
407 if (ret) {
408 ERR("Failed to truncate stream");
409 goto end;
410 }
411 } else {
412 ret = stream_rotate_data_file(stream);
413 }
414
415 end:
416 return ret;
417 }
418
419 /*
420 * Close the current index file if it is open, and create a new one.
421 *
422 * Return 0 on success, -1 on error.
423 */
424 static int create_index_file(struct relay_stream *stream,
425 struct lttng_trace_chunk *chunk)
426 {
427 int ret;
428 uint32_t major, minor;
429 char *index_subpath = NULL;
430 enum lttng_trace_chunk_status status;
431
432 ASSERT_LOCKED(stream->lock);
433
434 /* Put ref on previous index_file. */
435 if (stream->index_file) {
436 lttng_index_file_put(stream->index_file);
437 stream->index_file = NULL;
438 }
439 major = stream->trace->session->major;
440 minor = stream->trace->session->minor;
441
442 if (!chunk) {
443 ret = 0;
444 goto end;
445 }
446 ret = asprintf(&index_subpath, "%s/%s", stream->path_name,
447 DEFAULT_INDEX_DIR);
448 if (ret < 0) {
449 goto end;
450 }
451
452 status = lttng_trace_chunk_create_subdirectory(chunk,
453 index_subpath);
454 free(index_subpath);
455 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
456 ret = -1;
457 goto end;
458 }
459 status = lttng_index_file_create_from_trace_chunk(
460 chunk, stream->path_name,
461 stream->channel_name, stream->tracefile_size,
462 stream->tracefile_current_index,
463 lttng_to_index_major(major, minor),
464 lttng_to_index_minor(major, minor), true,
465 &stream->index_file);
466 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
467 ret = -1;
468 goto end;
469 }
470
471 ret = 0;
472
473 end:
474 return ret;
475 }
476
477 /*
478 * Check if a stream's index file should be rotated (for session rotation).
479 * Must be called with the stream lock held.
480 *
481 * Return 0 on success, a negative value on error.
482 */
483 static int try_rotate_stream_index(struct relay_stream *stream)
484 {
485 int ret = 0;
486
487 if (!stream->ongoing_rotation.is_set) {
488 /* No rotation expected. */
489 goto end;
490 }
491
492 if (stream->ongoing_rotation.value.index_rotated) {
493 /* Rotation of the index has already occurred. */
494 goto end;
495 }
496
497 DBG("%s: Stream %" PRIu64
498 " (rotate_at_packet_seq_num = %" PRIu64
499 ", received_packet_seq_num = "
500 "(value = %" PRIu64 ", is_set = %" PRIu8 "))",
501 __func__, stream->stream_handle,
502 stream->ongoing_rotation.value.packet_seq_num,
503 stream->received_packet_seq_num.value,
504 stream->received_packet_seq_num.is_set);
505
506 if (!stream->received_packet_seq_num.is_set ||
507 LTTNG_OPTIONAL_GET(stream->received_packet_seq_num) + 1 <
508 stream->ongoing_rotation.value.packet_seq_num) {
509 DBG("Stream %" PRIu64 " index not yet ready for rotation "
510 "(rotate_at_packet_seq_num = %" PRIu64
511 ", received_packet_seq_num = "
512 "(value = %" PRIu64 ", is_set = %" PRIu8 "))",
513 stream->stream_handle,
514 stream->ongoing_rotation.value.packet_seq_num,
515 stream->received_packet_seq_num.value,
516 stream->received_packet_seq_num.is_set);
517 goto end;
518 } else {
519 /*
520 * The next index belongs to the new trace chunk; rotate.
521 * In overwrite mode, the packet seq num may jump over the
522 * rotation position.
523 */
524 assert(LTTNG_OPTIONAL_GET(stream->received_packet_seq_num) + 1 >=
525 stream->ongoing_rotation.value.packet_seq_num);
526 DBG("Rotating stream %" PRIu64 " index file",
527 stream->stream_handle);
528 ret = create_index_file(stream,
529 stream->ongoing_rotation.value.next_trace_chunk);
530 stream->ongoing_rotation.value.index_rotated = true;
531
532 /*
533 * Set the rotation pivot position for the data, now that we have the
534 * net_seq_num matching the packet_seq_num index pivot position.
535 */
536 stream->ongoing_rotation.value.prev_data_net_seq =
537 stream->prev_index_seq;
538 if (stream->ongoing_rotation.value.data_rotated &&
539 stream->ongoing_rotation.value.index_rotated) {
540 /* Rotation completed; reset its state. */
541 DBG("Rotation completed for stream %" PRIu64,
542 stream->stream_handle);
543 stream_complete_rotation(stream);
544 }
545 }
546
547 end:
548 return ret;
549 }
550
551 static int stream_set_trace_chunk(struct relay_stream *stream,
552 struct lttng_trace_chunk *chunk)
553 {
554 int ret = 0;
555 enum lttng_trace_chunk_status status;
556 bool acquired_reference;
557 struct stream_fd *new_stream_fd = NULL;
558
559 status = lttng_trace_chunk_create_subdirectory(chunk,
560 stream->path_name);
561 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
562 ret = -1;
563 goto end;
564 }
565
566 lttng_trace_chunk_put(stream->trace_chunk);
567 acquired_reference = lttng_trace_chunk_get(chunk);
568 assert(acquired_reference);
569 stream->trace_chunk = chunk;
570
571 if (stream->stream_fd) {
572 stream_fd_put(stream->stream_fd);
573 stream->stream_fd = NULL;
574 }
575 ret = stream_create_data_output_file_from_trace_chunk(stream, chunk,
576 false, &new_stream_fd);
577 stream->stream_fd = new_stream_fd;
578 end:
579 return ret;
580 }
581
582 /*
583 * We keep ownership of path_name and channel_name.
584 */
585 struct relay_stream *stream_create(struct ctf_trace *trace,
586 uint64_t stream_handle, char *path_name,
587 char *channel_name, uint64_t tracefile_size,
588 uint64_t tracefile_count)
589 {
590 int ret;
591 struct relay_stream *stream = NULL;
592 struct relay_session *session = trace->session;
593 bool acquired_reference = false;
594 struct lttng_trace_chunk *current_trace_chunk;
595
596 stream = zmalloc(sizeof(struct relay_stream));
597 if (stream == NULL) {
598 PERROR("relay stream zmalloc");
599 goto error_no_alloc;
600 }
601
602 stream->stream_handle = stream_handle;
603 stream->prev_data_seq = -1ULL;
604 stream->prev_index_seq = -1ULL;
605 stream->last_net_seq_num = -1ULL;
606 stream->ctf_stream_id = -1ULL;
607 stream->tracefile_size = tracefile_size;
608 stream->tracefile_count = tracefile_count;
609 stream->path_name = path_name;
610 stream->channel_name = channel_name;
611 stream->beacon_ts_end = -1ULL;
612 lttng_ht_node_init_u64(&stream->node, stream->stream_handle);
613 pthread_mutex_init(&stream->lock, NULL);
614 urcu_ref_init(&stream->ref);
615 ctf_trace_get(trace);
616 stream->trace = trace;
617
618 pthread_mutex_lock(&trace->session->lock);
619 current_trace_chunk = trace->session->current_trace_chunk;
620 if (current_trace_chunk) {
621 acquired_reference = lttng_trace_chunk_get(current_trace_chunk);
622 }
623 pthread_mutex_unlock(&trace->session->lock);
624 if (!acquired_reference) {
625 ERR("Cannot create stream for channel \"%s\" as a reference to the session's current trace chunk could not be acquired",
626 channel_name);
627 ret = -1;
628 goto end;
629 }
630
631 stream->indexes_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64);
632 if (!stream->indexes_ht) {
633 ERR("Cannot created indexes_ht");
634 ret = -1;
635 goto end;
636 }
637
638 pthread_mutex_lock(&stream->lock);
639 ret = stream_set_trace_chunk(stream, current_trace_chunk);
640 pthread_mutex_unlock(&stream->lock);
641 if (ret) {
642 ERR("Failed to set the current trace chunk of session \"%s\" on newly created stream of channel \"%s\"",
643 trace->session->session_name,
644 stream->channel_name);
645 ret = -1;
646 goto end;
647 }
648 stream->tfa = tracefile_array_create(stream->tracefile_count);
649 if (!stream->tfa) {
650 ret = -1;
651 goto end;
652 }
653
654 stream->is_metadata = !strcmp(stream->channel_name,
655 DEFAULT_METADATA_NAME);
656 stream->in_recv_list = true;
657
658 /*
659 * Add the stream in the recv list of the session. Once the end stream
660 * message is received, all session streams are published.
661 */
662 pthread_mutex_lock(&session->recv_list_lock);
663 cds_list_add_rcu(&stream->recv_node, &session->recv_list);
664 session->stream_count++;
665 pthread_mutex_unlock(&session->recv_list_lock);
666
667 /*
668 * Both in the ctf_trace object and the global stream ht since the data
669 * side of the relayd does not have the concept of session.
670 */
671 lttng_ht_add_unique_u64(relay_streams_ht, &stream->node);
672 stream->in_stream_ht = true;
673
674 DBG("Relay new stream added %s with ID %" PRIu64, stream->channel_name,
675 stream->stream_handle);
676 ret = 0;
677
678 end:
679 if (ret) {
680 if (stream->stream_fd) {
681 stream_fd_put(stream->stream_fd);
682 stream->stream_fd = NULL;
683 }
684 stream_put(stream);
685 stream = NULL;
686 }
687 if (acquired_reference) {
688 lttng_trace_chunk_put(current_trace_chunk);
689 }
690 return stream;
691
692 error_no_alloc:
693 /*
694 * path_name and channel_name need to be freed explicitly here
695 * because we cannot rely on stream_put().
696 */
697 free(path_name);
698 free(channel_name);
699 return NULL;
700 }
701
702 /*
703 * Called with the session lock held.
704 */
705 void stream_publish(struct relay_stream *stream)
706 {
707 struct relay_session *session;
708
709 pthread_mutex_lock(&stream->lock);
710 if (stream->published) {
711 goto unlock;
712 }
713
714 session = stream->trace->session;
715
716 pthread_mutex_lock(&session->recv_list_lock);
717 if (stream->in_recv_list) {
718 cds_list_del_rcu(&stream->recv_node);
719 stream->in_recv_list = false;
720 }
721 pthread_mutex_unlock(&session->recv_list_lock);
722
723 pthread_mutex_lock(&stream->trace->stream_list_lock);
724 cds_list_add_rcu(&stream->stream_node, &stream->trace->stream_list);
725 pthread_mutex_unlock(&stream->trace->stream_list_lock);
726
727 stream->published = true;
728 unlock:
729 pthread_mutex_unlock(&stream->lock);
730 }
731
732 /*
733 * Stream must be protected by holding the stream lock or by virtue of being
734 * called from stream_destroy.
735 */
736 static void stream_unpublish(struct relay_stream *stream)
737 {
738 if (stream->in_stream_ht) {
739 struct lttng_ht_iter iter;
740 int ret;
741
742 iter.iter.node = &stream->node.node;
743 ret = lttng_ht_del(relay_streams_ht, &iter);
744 assert(!ret);
745 stream->in_stream_ht = false;
746 }
747 if (stream->published) {
748 pthread_mutex_lock(&stream->trace->stream_list_lock);
749 cds_list_del_rcu(&stream->stream_node);
750 pthread_mutex_unlock(&stream->trace->stream_list_lock);
751 stream->published = false;
752 }
753 }
754
755 static void stream_destroy(struct relay_stream *stream)
756 {
757 if (stream->indexes_ht) {
758 /*
759 * Calling lttng_ht_destroy in call_rcu worker thread so
760 * we don't hold the RCU read-side lock while calling
761 * it.
762 */
763 lttng_ht_destroy(stream->indexes_ht);
764 }
765 if (stream->tfa) {
766 tracefile_array_destroy(stream->tfa);
767 }
768 free(stream->path_name);
769 free(stream->channel_name);
770 free(stream);
771 }
772
773 static void stream_destroy_rcu(struct rcu_head *rcu_head)
774 {
775 struct relay_stream *stream =
776 caa_container_of(rcu_head, struct relay_stream, rcu_node);
777
778 stream_destroy(stream);
779 }
780
781 /*
782 * No need to take stream->lock since this is only called on the final
783 * stream_put which ensures that a single thread may act on the stream.
784 */
785 static void stream_release(struct urcu_ref *ref)
786 {
787 struct relay_stream *stream =
788 caa_container_of(ref, struct relay_stream, ref);
789 struct relay_session *session;
790
791 session = stream->trace->session;
792
793 DBG("Releasing stream id %" PRIu64, stream->stream_handle);
794
795 pthread_mutex_lock(&session->recv_list_lock);
796 session->stream_count--;
797 if (stream->in_recv_list) {
798 cds_list_del_rcu(&stream->recv_node);
799 stream->in_recv_list = false;
800 }
801 pthread_mutex_unlock(&session->recv_list_lock);
802
803 stream_unpublish(stream);
804
805 if (stream->stream_fd) {
806 stream_fd_put(stream->stream_fd);
807 stream->stream_fd = NULL;
808 }
809 if (stream->index_file) {
810 lttng_index_file_put(stream->index_file);
811 stream->index_file = NULL;
812 }
813 if (stream->trace) {
814 ctf_trace_put(stream->trace);
815 stream->trace = NULL;
816 }
817 stream_complete_rotation(stream);
818 lttng_trace_chunk_put(stream->trace_chunk);
819 stream->trace_chunk = NULL;
820
821 call_rcu(&stream->rcu_node, stream_destroy_rcu);
822 }
823
824 void stream_put(struct relay_stream *stream)
825 {
826 rcu_read_lock();
827 assert(stream->ref.refcount != 0);
828 /*
829 * Wait until we have processed all the stream packets before
830 * actually putting our last stream reference.
831 */
832 urcu_ref_put(&stream->ref, stream_release);
833 rcu_read_unlock();
834 }
835
836 int stream_set_pending_rotation(struct relay_stream *stream,
837 struct lttng_trace_chunk *next_trace_chunk,
838 uint64_t rotation_sequence_number)
839 {
840 int ret = 0;
841 const struct relay_stream_rotation rotation = {
842 .data_rotated = false,
843 .index_rotated = false,
844 .packet_seq_num = rotation_sequence_number,
845 .prev_data_net_seq = -1ULL,
846 .next_trace_chunk = next_trace_chunk,
847 };
848
849 if (stream->ongoing_rotation.is_set) {
850 ERR("Attempted to set a pending rotation on a stream already being rotated (protocol error)");
851 ret = -1;
852 goto end;
853 }
854
855 if (next_trace_chunk) {
856 const bool reference_acquired =
857 lttng_trace_chunk_get(next_trace_chunk);
858
859 assert(reference_acquired);
860 }
861 LTTNG_OPTIONAL_SET(&stream->ongoing_rotation, rotation);
862
863 DBG("Setting pending rotation: stream_id = %" PRIu64
864 ", rotate_at_packet_seq_num = %" PRIu64,
865 stream->stream_handle, rotation_sequence_number);
866 if (stream->is_metadata) {
867 /*
868 * A metadata stream has no index; consider it already rotated.
869 */
870 stream->ongoing_rotation.value.index_rotated = true;
871 ret = stream_rotate_data_file(stream);
872 } else {
873 ret = try_rotate_stream_index(stream);
874 if (ret < 0) {
875 goto end;
876 }
877
878 ret = try_rotate_stream_data(stream);
879 if (ret < 0) {
880 goto end;
881 }
882 }
883 end:
884 return ret;
885 }
886
887 void try_stream_close(struct relay_stream *stream)
888 {
889 bool session_aborted;
890 struct relay_session *session = stream->trace->session;
891
892 DBG("Trying to close stream %" PRIu64, stream->stream_handle);
893
894 pthread_mutex_lock(&session->lock);
895 session_aborted = session->aborted;
896 pthread_mutex_unlock(&session->lock);
897
898 pthread_mutex_lock(&stream->lock);
899 /*
900 * Can be called concurently by connection close and reception of last
901 * pending data.
902 */
903 if (stream->closed) {
904 pthread_mutex_unlock(&stream->lock);
905 DBG("closing stream %" PRIu64 " aborted since it is already marked as closed", stream->stream_handle);
906 return;
907 }
908
909 stream->close_requested = true;
910
911 if (stream->last_net_seq_num == -1ULL) {
912 /*
913 * Handle connection close without explicit stream close
914 * command.
915 *
916 * We can be clever about indexes partially received in
917 * cases where we received the data socket part, but not
918 * the control socket part: since we're currently closing
919 * the stream on behalf of the control socket, we *know*
920 * there won't be any more control information for this
921 * socket. Therefore, we can destroy all indexes for
922 * which we have received only the file descriptor (from
923 * data socket). This takes care of consumerd crashes
924 * between sending the data and control information for
925 * a packet. Since those are sent in that order, we take
926 * care of consumerd crashes.
927 */
928 DBG("relay_index_close_partial_fd");
929 relay_index_close_partial_fd(stream);
930 /*
931 * Use the highest net_seq_num we currently have pending
932 * As end of stream indicator. Leave last_net_seq_num
933 * at -1ULL if we cannot find any index.
934 */
935 stream->last_net_seq_num = relay_index_find_last(stream);
936 DBG("Updating stream->last_net_seq_num to %" PRIu64, stream->last_net_seq_num);
937 /* Fall-through into the next check. */
938 }
939
940 if (stream->last_net_seq_num != -1ULL &&
941 ((int64_t) (stream->prev_data_seq - stream->last_net_seq_num)) < 0
942 && !session_aborted) {
943 /*
944 * Don't close since we still have data pending. This
945 * handles cases where an explicit close command has
946 * been received for this stream, and cases where the
947 * connection has been closed, and we are awaiting for
948 * index information from the data socket. It is
949 * therefore expected that all the index fd information
950 * we need has already been received on the control
951 * socket. Matching index information from data socket
952 * should be Expected Soon(TM).
953 *
954 * TODO: We should implement a timer to garbage collect
955 * streams after a timeout to be resilient against a
956 * consumerd implementation that would not match this
957 * expected behavior.
958 */
959 pthread_mutex_unlock(&stream->lock);
960 DBG("closing stream %" PRIu64 " aborted since it still has data pending", stream->stream_handle);
961 return;
962 }
963 /*
964 * We received all the indexes we can expect.
965 */
966 stream_unpublish(stream);
967 stream->closed = true;
968 /* Relay indexes are only used by the "consumer/sessiond" end. */
969 relay_index_close_all(stream);
970
971 /*
972 * If we are closed by an application exiting (per-pid buffers),
973 * we need to put our reference on the stream trace chunk right
974 * away, because otherwise still holding the reference on the
975 * trace chunk could allow a viewer stream (which holds a reference
976 * to the stream) to postpone destroy waiting for the chunk to cease
977 * to exist endlessly until the viewer is detached.
978 */
979
980 /* Put stream fd before put chunk. */
981 if (stream->stream_fd) {
982 stream_fd_put(stream->stream_fd);
983 stream->stream_fd = NULL;
984 }
985 if (stream->index_file) {
986 lttng_index_file_put(stream->index_file);
987 stream->index_file = NULL;
988 }
989 lttng_trace_chunk_put(stream->trace_chunk);
990 stream->trace_chunk = NULL;
991 pthread_mutex_unlock(&stream->lock);
992 DBG("Succeeded in closing stream %" PRIu64, stream->stream_handle);
993 stream_put(stream);
994 }
995
996 int stream_init_packet(struct relay_stream *stream, size_t packet_size,
997 bool *file_rotated)
998 {
999 int ret = 0;
1000
1001 ASSERT_LOCKED(stream->lock);
1002
1003 if (!stream->stream_fd || !stream->trace_chunk) {
1004 ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
1005 stream->stream_handle, stream->channel_name);
1006 ret = -1;
1007 goto end;
1008 }
1009
1010 if (caa_likely(stream->tracefile_size == 0)) {
1011 /* No size limit set; nothing to check. */
1012 goto end;
1013 }
1014
1015 /*
1016 * Check if writing the new packet would exceed the maximal file size.
1017 */
1018 if (caa_unlikely((stream->tracefile_size_current + packet_size) >
1019 stream->tracefile_size)) {
1020 const uint64_t new_file_index =
1021 (stream->tracefile_current_index + 1) %
1022 stream->tracefile_count;
1023
1024 if (new_file_index < stream->tracefile_current_index) {
1025 stream->tracefile_wrapped_around = true;
1026 }
1027 DBG("New stream packet causes stream file rotation: stream_id = %" PRIu64
1028 ", current_file_size = %" PRIu64
1029 ", packet_size = %zu, current_file_index = %" PRIu64
1030 " new_file_index = %" PRIu64,
1031 stream->stream_handle,
1032 stream->tracefile_size_current, packet_size,
1033 stream->tracefile_current_index, new_file_index);
1034 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_WRITE);
1035 stream->tracefile_current_index = new_file_index;
1036
1037 if (stream->stream_fd) {
1038 stream_fd_put(stream->stream_fd);
1039 stream->stream_fd = NULL;
1040 }
1041 ret = stream_create_data_output_file_from_trace_chunk(stream,
1042 stream->trace_chunk, false, &stream->stream_fd);
1043 if (ret) {
1044 ERR("Failed to perform trace file rotation of stream %" PRIu64,
1045 stream->stream_handle);
1046 goto end;
1047 }
1048
1049 /*
1050 * Reset current size because we just performed a stream
1051 * rotation.
1052 */
1053 DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
1054 __func__, stream->stream_handle, stream->tracefile_size_current);
1055 stream->tracefile_size_current = 0;
1056 *file_rotated = true;
1057 } else {
1058 *file_rotated = false;
1059 }
1060 end:
1061 return ret;
1062 }
1063
1064 /* Note that the packet is not necessarily complete. */
1065 int stream_write(struct relay_stream *stream,
1066 const struct lttng_buffer_view *packet, size_t padding_len)
1067 {
1068 int ret = 0;
1069 ssize_t write_ret;
1070 size_t padding_to_write = padding_len;
1071 char padding_buffer[FILE_IO_STACK_BUFFER_SIZE];
1072
1073 ASSERT_LOCKED(stream->lock);
1074 memset(padding_buffer, 0,
1075 min(sizeof(padding_buffer), padding_to_write));
1076
1077 if (!stream->stream_fd || !stream->trace_chunk) {
1078 ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
1079 stream->stream_handle, stream->channel_name);
1080 ret = -1;
1081 goto end;
1082 }
1083 if (packet) {
1084 write_ret = lttng_write(stream->stream_fd->fd,
1085 packet->data, packet->size);
1086 if (write_ret != packet->size) {
1087 PERROR("Failed to write to stream file of %sstream %" PRIu64,
1088 stream->is_metadata ? "metadata " : "",
1089 stream->stream_handle);
1090 ret = -1;
1091 goto end;
1092 }
1093 }
1094
1095 while (padding_to_write > 0) {
1096 const size_t padding_to_write_this_pass =
1097 min(padding_to_write, sizeof(padding_buffer));
1098
1099 write_ret = lttng_write(stream->stream_fd->fd,
1100 padding_buffer, padding_to_write_this_pass);
1101 if (write_ret != padding_to_write_this_pass) {
1102 PERROR("Failed to write padding to file of %sstream %" PRIu64,
1103 stream->is_metadata ? "metadata " : "",
1104 stream->stream_handle);
1105 ret = -1;
1106 goto end;
1107 }
1108 padding_to_write -= padding_to_write_this_pass;
1109 }
1110
1111 if (stream->is_metadata) {
1112 size_t recv_len;
1113
1114 recv_len = packet ? packet->size : 0;
1115 recv_len += padding_len;
1116 stream->metadata_received += recv_len;
1117 if (recv_len) {
1118 stream->no_new_metadata_notified = false;
1119 }
1120 }
1121
1122 DBG("Wrote to %sstream %" PRIu64 ": data_length = %zu, padding_length = %zu",
1123 stream->is_metadata ? "metadata " : "",
1124 stream->stream_handle,
1125 packet ? packet->size : (size_t) 0, padding_len);
1126 end:
1127 return ret;
1128 }
1129
1130 /*
1131 * Update index after receiving a packet for a data stream.
1132 *
1133 * Called with the stream lock held.
1134 *
1135 * Return 0 on success else a negative value.
1136 */
1137 int stream_update_index(struct relay_stream *stream, uint64_t net_seq_num,
1138 bool rotate_index, bool *flushed, uint64_t total_size)
1139 {
1140 int ret = 0;
1141 uint64_t data_offset;
1142 struct relay_index *index;
1143
1144 assert(stream->trace_chunk);
1145 ASSERT_LOCKED(stream->lock);
1146 /* Get data offset because we are about to update the index. */
1147 data_offset = htobe64(stream->tracefile_size_current);
1148
1149 DBG("handle_index_data: stream %" PRIu64 " net_seq_num %" PRIu64 " data offset %" PRIu64,
1150 stream->stream_handle, net_seq_num, stream->tracefile_size_current);
1151
1152 /*
1153 * Lookup for an existing index for that stream id/sequence
1154 * number. If it exists, the control thread has already received the
1155 * data for it, thus we need to write it to disk.
1156 */
1157 index = relay_index_get_by_id_or_create(stream, net_seq_num);
1158 if (!index) {
1159 ret = -1;
1160 goto end;
1161 }
1162
1163 if (rotate_index || !stream->index_file) {
1164 ret = create_index_file(stream, stream->trace_chunk);
1165 if (ret) {
1166 ERR("Failed to create index file for stream %" PRIu64,
1167 stream->stream_handle);
1168 /* Put self-ref for this index due to error. */
1169 relay_index_put(index);
1170 index = NULL;
1171 goto end;
1172 }
1173 }
1174
1175 if (relay_index_set_file(index, stream->index_file, data_offset)) {
1176 ret = -1;
1177 /* Put self-ref for this index due to error. */
1178 relay_index_put(index);
1179 index = NULL;
1180 goto end;
1181 }
1182
1183 ret = relay_index_try_flush(index);
1184 if (ret == 0) {
1185 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
1186 tracefile_array_commit_seq(stream->tfa, stream->index_received_seqcount);
1187 stream->index_received_seqcount++;
1188 LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
1189 be64toh(index->index_data.packet_seq_num));
1190 *flushed = true;
1191 } else if (ret > 0) {
1192 index->total_size = total_size;
1193 /* No flush. */
1194 ret = 0;
1195 } else {
1196 /*
1197 * ret < 0
1198 *
1199 * relay_index_try_flush is responsible for the self-reference
1200 * put of the index object on error.
1201 */
1202 ERR("relay_index_try_flush error %d", ret);
1203 ret = -1;
1204 }
1205 end:
1206 return ret;
1207 }
1208
1209 int stream_complete_packet(struct relay_stream *stream, size_t packet_total_size,
1210 uint64_t sequence_number, bool index_flushed)
1211 {
1212 int ret = 0;
1213
1214 ASSERT_LOCKED(stream->lock);
1215
1216 stream->tracefile_size_current += packet_total_size;
1217 if (index_flushed) {
1218 stream->pos_after_last_complete_data_index =
1219 stream->tracefile_size_current;
1220 stream->prev_index_seq = sequence_number;
1221 ret = try_rotate_stream_index(stream);
1222 if (ret < 0) {
1223 goto end;
1224 }
1225 }
1226
1227 stream->prev_data_seq = sequence_number;
1228 ret = try_rotate_stream_data(stream);
1229
1230 end:
1231 return ret;
1232 }
1233
1234 int stream_add_index(struct relay_stream *stream,
1235 const struct lttcomm_relayd_index *index_info)
1236 {
1237 int ret = 0;
1238 struct relay_index *index;
1239
1240 ASSERT_LOCKED(stream->lock);
1241
1242 DBG("stream_add_index for stream %" PRIu64, stream->stream_handle);
1243
1244 /* Live beacon handling */
1245 if (index_info->packet_size == 0) {
1246 DBG("Received live beacon for stream %" PRIu64,
1247 stream->stream_handle);
1248
1249 /*
1250 * Only flag a stream inactive when it has already
1251 * received data and no indexes are in flight.
1252 */
1253 if (stream->index_received_seqcount > 0
1254 && stream->indexes_in_flight == 0) {
1255 stream->beacon_ts_end = index_info->timestamp_end;
1256 }
1257 ret = 0;
1258 goto end;
1259 } else {
1260 stream->beacon_ts_end = -1ULL;
1261 }
1262
1263 if (stream->ctf_stream_id == -1ULL) {
1264 stream->ctf_stream_id = index_info->stream_id;
1265 }
1266
1267 index = relay_index_get_by_id_or_create(stream, index_info->net_seq_num);
1268 if (!index) {
1269 ret = -1;
1270 ERR("Failed to get or create index %" PRIu64,
1271 index_info->net_seq_num);
1272 goto end;
1273 }
1274 if (relay_index_set_control_data(index, index_info,
1275 stream->trace->session->minor)) {
1276 ERR("set_index_control_data error");
1277 relay_index_put(index);
1278 ret = -1;
1279 goto end;
1280 }
1281 ret = relay_index_try_flush(index);
1282 if (ret == 0) {
1283 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
1284 tracefile_array_commit_seq(stream->tfa, stream->index_received_seqcount);
1285 stream->index_received_seqcount++;
1286 stream->pos_after_last_complete_data_index += index->total_size;
1287 stream->prev_index_seq = index_info->net_seq_num;
1288 LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
1289 index_info->packet_seq_num);
1290
1291 ret = try_rotate_stream_index(stream);
1292 if (ret < 0) {
1293 goto end;
1294 }
1295 ret = try_rotate_stream_data(stream);
1296 if (ret < 0) {
1297 goto end;
1298 }
1299 } else if (ret > 0) {
1300 /* no flush. */
1301 ret = 0;
1302 } else {
1303 /*
1304 * ret < 0
1305 *
1306 * relay_index_try_flush is responsible for the self-reference
1307 * put of the index object on error.
1308 */
1309 ERR("relay_index_try_flush error %d", ret);
1310 ret = -1;
1311 }
1312 end:
1313 return ret;
1314 }
1315
1316 static void print_stream_indexes(struct relay_stream *stream)
1317 {
1318 struct lttng_ht_iter iter;
1319 struct relay_index *index;
1320
1321 rcu_read_lock();
1322 cds_lfht_for_each_entry(stream->indexes_ht->ht, &iter.iter, index,
1323 index_n.node) {
1324 DBG("index %p net_seq_num %" PRIu64 " refcount %ld"
1325 " stream %" PRIu64 " trace %" PRIu64
1326 " session %" PRIu64,
1327 index,
1328 index->index_n.key,
1329 stream->ref.refcount,
1330 index->stream->stream_handle,
1331 index->stream->trace->id,
1332 index->stream->trace->session->id);
1333 }
1334 rcu_read_unlock();
1335 }
1336
1337 int stream_reset_file(struct relay_stream *stream)
1338 {
1339 ASSERT_LOCKED(stream->lock);
1340
1341 if (stream->stream_fd) {
1342 stream_fd_put(stream->stream_fd);
1343 stream->stream_fd = NULL;
1344 }
1345
1346 DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
1347 __func__, stream->stream_handle, stream->tracefile_size_current);
1348 stream->tracefile_size_current = 0;
1349 stream->prev_data_seq = 0;
1350 stream->prev_index_seq = 0;
1351 /* Note that this does not reset the tracefile array. */
1352 stream->tracefile_current_index = 0;
1353 stream->pos_after_last_complete_data_index = 0;
1354
1355 return stream_create_data_output_file_from_trace_chunk(stream,
1356 stream->trace_chunk, true, &stream->stream_fd);
1357 }
1358
1359 void print_relay_streams(void)
1360 {
1361 struct lttng_ht_iter iter;
1362 struct relay_stream *stream;
1363
1364 if (!relay_streams_ht) {
1365 return;
1366 }
1367
1368 rcu_read_lock();
1369 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream,
1370 node.node) {
1371 if (!stream_get(stream)) {
1372 continue;
1373 }
1374 DBG("stream %p refcount %ld stream %" PRIu64 " trace %" PRIu64
1375 " session %" PRIu64,
1376 stream,
1377 stream->ref.refcount,
1378 stream->stream_handle,
1379 stream->trace->id,
1380 stream->trace->session->id);
1381 print_stream_indexes(stream);
1382 stream_put(stream);
1383 }
1384 rcu_read_unlock();
1385 }
This page took 0.096907 seconds and 4 git commands to generate.