Fix: relayd: use packet sequence number for rotation position
[lttng-tools.git] / src / bin / lttng-relayd / stream.c
1 /*
2 * Copyright (C) 2013 - Julien Desfossez <jdesfossez@efficios.com>
3 * David Goulet <dgoulet@efficios.com>
4 * 2015 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * 2019 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License, version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #define _LGPL_SOURCE
22 #include <common/common.h>
23 #include <common/utils.h>
24 #include <common/defaults.h>
25 #include <common/sessiond-comm/relayd.h>
26 #include <urcu/rculist.h>
27 #include <sys/stat.h>
28
29 #include "lttng-relayd.h"
30 #include "index.h"
31 #include "stream.h"
32 #include "viewer-stream.h"
33
34 #include <sys/types.h>
35 #include <fcntl.h>
36
37 #define FILE_IO_STACK_BUFFER_SIZE 65536
38
39 /* Should be called with RCU read-side lock held. */
40 bool stream_get(struct relay_stream *stream)
41 {
42 return urcu_ref_get_unless_zero(&stream->ref);
43 }
44
45 /*
46 * Get stream from stream id from the streams hash table. Return stream
47 * if found else NULL. A stream reference is taken when a stream is
48 * returned. stream_put() must be called on that stream.
49 */
50 struct relay_stream *stream_get_by_id(uint64_t stream_id)
51 {
52 struct lttng_ht_node_u64 *node;
53 struct lttng_ht_iter iter;
54 struct relay_stream *stream = NULL;
55
56 rcu_read_lock();
57 lttng_ht_lookup(relay_streams_ht, &stream_id, &iter);
58 node = lttng_ht_iter_get_node_u64(&iter);
59 if (!node) {
60 DBG("Relay stream %" PRIu64 " not found", stream_id);
61 goto end;
62 }
63 stream = caa_container_of(node, struct relay_stream, node);
64 if (!stream_get(stream)) {
65 stream = NULL;
66 }
67 end:
68 rcu_read_unlock();
69 return stream;
70 }
71
72 static void stream_complete_rotation(struct relay_stream *stream)
73 {
74 DBG("Rotation completed for stream %" PRIu64, stream->stream_handle);
75 lttng_trace_chunk_put(stream->trace_chunk);
76 stream->trace_chunk = stream->ongoing_rotation.value.next_trace_chunk;
77 stream->ongoing_rotation = (typeof(stream->ongoing_rotation)) {};
78 }
79
80 static int stream_create_data_output_file_from_trace_chunk(
81 struct relay_stream *stream,
82 struct lttng_trace_chunk *trace_chunk,
83 bool force_unlink,
84 struct stream_fd **out_stream_fd)
85 {
86 int ret, fd;
87 char stream_path[LTTNG_PATH_MAX];
88 enum lttng_trace_chunk_status status;
89 const int flags = O_RDWR | O_CREAT | O_TRUNC;
90 const mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
91
92 ASSERT_LOCKED(stream->lock);
93
94 ret = utils_stream_file_path(stream->path_name, stream->channel_name,
95 stream->tracefile_size, stream->tracefile_current_index,
96 NULL, stream_path, sizeof(stream_path));
97 if (ret < 0) {
98 goto end;
99 }
100
101 if (stream->tracefile_wrapped_around || force_unlink) {
102 /*
103 * The on-disk ring-buffer has wrapped around.
104 * Newly created stream files will replace existing files. Since
105 * live clients may be consuming existing files, the file about
106 * to be replaced is unlinked in order to not overwrite its
107 * content.
108 */
109 status = lttng_trace_chunk_unlink_file(trace_chunk,
110 stream_path);
111 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
112 PERROR("Failed to unlink stream file \"%s\" during trace file rotation",
113 stream_path);
114 /*
115 * Don't abort if the file doesn't exist, it is
116 * unexpected, but should not be a fatal error.
117 */
118 if (errno != ENOENT) {
119 ret = -1;
120 goto end;
121 }
122 }
123 }
124
125 status = lttng_trace_chunk_open_file(
126 trace_chunk, stream_path, flags, mode, &fd);
127 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
128 ERR("Failed to open stream file \"%s\"", stream->channel_name);
129 ret = -1;
130 goto end;
131 }
132
133 *out_stream_fd = stream_fd_create(fd);
134 if (!*out_stream_fd) {
135 if (close(ret)) {
136 PERROR("Error closing stream file descriptor %d", ret);
137 }
138 ret = -1;
139 goto end;
140 }
141 end:
142 return ret;
143 }
144
145 static int stream_rotate_data_file(struct relay_stream *stream)
146 {
147 int ret = 0;
148
149 DBG("Rotating stream %" PRIu64 " data file",
150 stream->stream_handle);
151
152 if (stream->stream_fd) {
153 stream_fd_put(stream->stream_fd);
154 stream->stream_fd = NULL;
155 }
156
157 stream->tracefile_wrapped_around = false;
158 stream->tracefile_current_index = 0;
159
160 if (stream->ongoing_rotation.value.next_trace_chunk) {
161 struct stream_fd *new_stream_fd = NULL;
162 enum lttng_trace_chunk_status chunk_status;
163
164 chunk_status = lttng_trace_chunk_create_subdirectory(
165 stream->ongoing_rotation.value.next_trace_chunk,
166 stream->path_name);
167 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
168 ret = -1;
169 goto end;
170 }
171
172 /* Rotate the data file. */
173 ret = stream_create_data_output_file_from_trace_chunk(stream,
174 stream->ongoing_rotation.value.next_trace_chunk,
175 false, &new_stream_fd);
176 stream->stream_fd = new_stream_fd;
177 if (ret < 0) {
178 ERR("Failed to rotate stream data file");
179 goto end;
180 }
181 }
182 stream->tracefile_size_current = 0;
183 stream->pos_after_last_complete_data_index = 0;
184 stream->ongoing_rotation.value.data_rotated = true;
185
186 if (stream->ongoing_rotation.value.index_rotated) {
187 /* Rotation completed; reset its state. */
188 stream_complete_rotation(stream);
189 }
190 end:
191 return ret;
192 }
193
194 /*
195 * If too much data has been written in a tracefile before we received the
196 * rotation command, we have to move the excess data to the new tracefile and
197 * perform the rotation. This can happen because the control and data
198 * connections are separate, the indexes as well as the commands arrive from
199 * the control connection and we have no control over the order so we could be
200 * in a situation where too much data has been received on the data connection
201 * before the rotation command on the control connection arrives.
202 */
203 static int rotate_truncate_stream(struct relay_stream *stream)
204 {
205 int ret;
206 off_t lseek_ret, previous_stream_copy_origin;
207 uint64_t copy_bytes_left, misplaced_data_size;
208 bool acquired_reference;
209 struct stream_fd *previous_stream_fd = NULL;
210 struct lttng_trace_chunk *previous_chunk = NULL;
211
212 if (!LTTNG_OPTIONAL_GET(stream->ongoing_rotation).next_trace_chunk) {
213 ERR("Protocol error encoutered in %s(): stream rotation "
214 "sequence number is before the current sequence number "
215 "and the next trace chunk is unset. Honoring this "
216 "rotation command would result in data loss",
217 __FUNCTION__);
218 ret = -1;
219 goto end;
220 }
221
222 ASSERT_LOCKED(stream->lock);
223 /*
224 * Acquire a reference to the current trace chunk to ensure
225 * it is not reclaimed when `stream_rotate_data_file` is called.
226 * Failing to do so would violate the contract of the trace
227 * chunk API as an active file descriptor would outlive the
228 * trace chunk.
229 */
230 acquired_reference = lttng_trace_chunk_get(stream->trace_chunk);
231 assert(acquired_reference);
232 previous_chunk = stream->trace_chunk;
233
234 /*
235 * Steal the stream's reference to its stream_fd. A new
236 * stream_fd will be created when the rotation completes and
237 * the orinal stream_fd will be used to copy the "extra" data
238 * to the new file.
239 */
240 assert(stream->stream_fd);
241 previous_stream_fd = stream->stream_fd;
242 stream->stream_fd = NULL;
243
244 assert(!stream->is_metadata);
245 assert(stream->tracefile_size_current >
246 stream->pos_after_last_complete_data_index);
247 misplaced_data_size = stream->tracefile_size_current -
248 stream->pos_after_last_complete_data_index;
249 copy_bytes_left = misplaced_data_size;
250 previous_stream_copy_origin = stream->pos_after_last_complete_data_index;
251
252 ret = stream_rotate_data_file(stream);
253 if (ret) {
254 goto end;
255 }
256
257 assert(stream->stream_fd);
258 /*
259 * Seek the current tracefile to the position at which the rotation
260 * should have occurred.
261 */
262 lseek_ret = lseek(previous_stream_fd->fd, previous_stream_copy_origin,
263 SEEK_SET);
264 if (lseek_ret < 0) {
265 PERROR("Failed to seek to offset %" PRIu64
266 " while copying extra data received before a stream rotation",
267 (uint64_t) previous_stream_copy_origin);
268 ret = -1;
269 goto end;
270 }
271
272 /* Move data from the old file to the new file. */
273 while (copy_bytes_left) {
274 ssize_t io_ret;
275 char copy_buffer[FILE_IO_STACK_BUFFER_SIZE];
276 const off_t copy_size_this_pass = min_t(
277 off_t, copy_bytes_left, sizeof(copy_buffer));
278
279 io_ret = lttng_read(previous_stream_fd->fd, copy_buffer,
280 copy_size_this_pass);
281 if (io_ret < (ssize_t) copy_size_this_pass) {
282 if (io_ret == -1) {
283 PERROR("Failed to read %" PRIu64
284 " bytes from fd %i in %s(), returned %zi",
285 copy_size_this_pass,
286 previous_stream_fd->fd,
287 __FUNCTION__, io_ret);
288 } else {
289 ERR("Failed to read %" PRIu64
290 " bytes from fd %i in %s(), returned %zi",
291 copy_size_this_pass,
292 previous_stream_fd->fd,
293 __FUNCTION__, io_ret);
294 }
295 ret = -1;
296 goto end;
297 }
298
299 io_ret = lttng_write(stream->stream_fd->fd, copy_buffer,
300 copy_size_this_pass);
301 if (io_ret < (ssize_t) copy_size_this_pass) {
302 if (io_ret == -1) {
303 PERROR("Failed to write %" PRIu64
304 " bytes from fd %i in %s(), returned %zi",
305 copy_size_this_pass,
306 stream->stream_fd->fd,
307 __FUNCTION__, io_ret);
308 } else {
309 ERR("Failed to write %" PRIu64
310 " bytes from fd %i in %s(), returned %zi",
311 copy_size_this_pass,
312 stream->stream_fd->fd,
313 __FUNCTION__, io_ret);
314 }
315 ret = -1;
316 goto end;
317 }
318 copy_bytes_left -= copy_size_this_pass;
319 }
320
321 /* Truncate the file to get rid of the excess data. */
322 ret = ftruncate(previous_stream_fd->fd, previous_stream_copy_origin);
323 if (ret) {
324 PERROR("Failed to truncate current stream file to offset %" PRIu64,
325 previous_stream_copy_origin);
326 goto end;
327 }
328
329 /*
330 * Update the offset and FD of all the eventual indexes created by the
331 * data connection before the rotation command arrived.
332 */
333 ret = relay_index_switch_all_files(stream);
334 if (ret < 0) {
335 ERR("Failed to rotate index file");
336 goto end;
337 }
338
339 stream->tracefile_size_current = misplaced_data_size;
340 /* Index and data contents are back in sync. */
341 stream->pos_after_last_complete_data_index = 0;
342 ret = 0;
343 end:
344 lttng_trace_chunk_put(previous_chunk);
345 stream_fd_put(previous_stream_fd);
346 return ret;
347 }
348
349 /*
350 * Check if a stream's data file (as opposed to index) should be rotated
351 * (for session rotation).
352 * Must be called with the stream lock held.
353 *
354 * Return 0 on success, a negative value on error.
355 */
356 static int try_rotate_stream_data(struct relay_stream *stream)
357 {
358 int ret = 0;
359
360 if (caa_likely(!stream->ongoing_rotation.is_set)) {
361 /* No rotation expected. */
362 goto end;
363 }
364
365 if (stream->ongoing_rotation.value.data_rotated) {
366 /* Rotation of the data file has already occurred. */
367 goto end;
368 }
369
370 if (stream->prev_data_seq == -1ULL ||
371 stream->ongoing_rotation.value.prev_data_net_seq == -1ULL ||
372 stream->prev_data_seq <
373 stream->ongoing_rotation.value.prev_data_net_seq) {
374 /*
375 * The next packet that will be written is not part of the next
376 * chunk yet.
377 */
378 DBG("Stream %" PRIu64 " data not yet ready for rotation "
379 "(rotate_at_index_packet_seq_num = %" PRIu64
380 ", rotate_at_prev_data_net_seq = %" PRIu64
381 ", prev_data_seq = %" PRIu64 ")",
382 stream->stream_handle,
383 stream->ongoing_rotation.value.packet_seq_num,
384 stream->ongoing_rotation.value.prev_data_net_seq,
385 stream->prev_data_seq);
386 goto end;
387 } else if (stream->prev_data_seq > stream->ongoing_rotation.value.prev_data_net_seq) {
388 /*
389 * prev_data_seq is checked here since indexes and rotation
390 * commands are serialized with respect to each other.
391 */
392 DBG("Rotation after too much data has been written in tracefile "
393 "for stream %" PRIu64 ", need to truncate before "
394 "rotating", stream->stream_handle);
395 ret = rotate_truncate_stream(stream);
396 if (ret) {
397 ERR("Failed to truncate stream");
398 goto end;
399 }
400 } else {
401 ret = stream_rotate_data_file(stream);
402 }
403
404 end:
405 return ret;
406 }
407
408 /*
409 * Close the current index file if it is open, and create a new one.
410 *
411 * Return 0 on success, -1 on error.
412 */
413 static int create_index_file(struct relay_stream *stream,
414 struct lttng_trace_chunk *chunk)
415 {
416 int ret;
417 uint32_t major, minor;
418 char *index_subpath = NULL;
419 enum lttng_trace_chunk_status status;
420
421 ASSERT_LOCKED(stream->lock);
422
423 /* Put ref on previous index_file. */
424 if (stream->index_file) {
425 lttng_index_file_put(stream->index_file);
426 stream->index_file = NULL;
427 }
428 major = stream->trace->session->major;
429 minor = stream->trace->session->minor;
430
431 if (!chunk) {
432 ret = 0;
433 goto end;
434 }
435 ret = asprintf(&index_subpath, "%s/%s", stream->path_name,
436 DEFAULT_INDEX_DIR);
437 if (ret < 0) {
438 goto end;
439 }
440
441 status = lttng_trace_chunk_create_subdirectory(chunk,
442 index_subpath);
443 free(index_subpath);
444 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
445 ret = -1;
446 goto end;
447 }
448 stream->index_file = lttng_index_file_create_from_trace_chunk(
449 chunk, stream->path_name,
450 stream->channel_name, stream->tracefile_size,
451 stream->tracefile_current_index,
452 lttng_to_index_major(major, minor),
453 lttng_to_index_minor(major, minor), true);
454 if (!stream->index_file) {
455 ret = -1;
456 goto end;
457 }
458
459 ret = 0;
460
461 end:
462 return ret;
463 }
464
465 /*
466 * Check if a stream's index file should be rotated (for session rotation).
467 * Must be called with the stream lock held.
468 *
469 * Return 0 on success, a negative value on error.
470 */
471 static int try_rotate_stream_index(struct relay_stream *stream)
472 {
473 int ret = 0;
474
475 if (!stream->ongoing_rotation.is_set) {
476 /* No rotation expected. */
477 goto end;
478 }
479
480 if (stream->ongoing_rotation.value.index_rotated) {
481 /* Rotation of the index has already occurred. */
482 goto end;
483 }
484
485 if (!stream->received_packet_seq_num.is_set ||
486 LTTNG_OPTIONAL_GET(stream->received_packet_seq_num) + 1 <
487 stream->ongoing_rotation.value.packet_seq_num) {
488 DBG("Stream %" PRIu64 " index not yet ready for rotation "
489 "(rotate_at_packet_seq_num = %" PRIu64
490 ", received_packet_seq_num = "
491 "(value = %" PRIu64 ", is_set = %" PRIu8 "))",
492 stream->stream_handle,
493 stream->ongoing_rotation.value.packet_seq_num,
494 stream->received_packet_seq_num.value,
495 stream->received_packet_seq_num.is_set);
496 goto end;
497 } else {
498 /*
499 * The next index belongs to the new trace chunk; rotate.
500 * In overwrite mode, the packet seq num may jump over the
501 * rotation position.
502 */
503 assert(LTTNG_OPTIONAL_GET(stream->received_packet_seq_num) + 1 >=
504 stream->ongoing_rotation.value.packet_seq_num);
505 DBG("Rotating stream %" PRIu64 " index file",
506 stream->stream_handle);
507 ret = create_index_file(stream,
508 stream->ongoing_rotation.value.next_trace_chunk);
509 stream->ongoing_rotation.value.index_rotated = true;
510
511 /*
512 * Set the rotation pivot position for the data, now that we have the
513 * net_seq_num matching the packet_seq_num index pivot position.
514 */
515 stream->ongoing_rotation.value.prev_data_net_seq =
516 stream->prev_index_seq;
517 if (stream->ongoing_rotation.value.data_rotated &&
518 stream->ongoing_rotation.value.index_rotated) {
519 /* Rotation completed; reset its state. */
520 DBG("Rotation completed for stream %" PRIu64,
521 stream->stream_handle);
522 stream_complete_rotation(stream);
523 }
524 }
525
526 end:
527 return ret;
528 }
529
530 static int stream_set_trace_chunk(struct relay_stream *stream,
531 struct lttng_trace_chunk *chunk)
532 {
533 int ret = 0;
534 enum lttng_trace_chunk_status status;
535 bool acquired_reference;
536 struct stream_fd *new_stream_fd = NULL;
537
538 status = lttng_trace_chunk_create_subdirectory(chunk,
539 stream->path_name);
540 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
541 ret = -1;
542 goto end;
543 }
544
545 lttng_trace_chunk_put(stream->trace_chunk);
546 acquired_reference = lttng_trace_chunk_get(chunk);
547 assert(acquired_reference);
548 stream->trace_chunk = chunk;
549
550 if (stream->stream_fd) {
551 stream_fd_put(stream->stream_fd);
552 stream->stream_fd = NULL;
553 }
554 ret = stream_create_data_output_file_from_trace_chunk(stream, chunk,
555 false, &new_stream_fd);
556 stream->stream_fd = new_stream_fd;
557 end:
558 return ret;
559 }
560
561 /*
562 * We keep ownership of path_name and channel_name.
563 */
564 struct relay_stream *stream_create(struct ctf_trace *trace,
565 uint64_t stream_handle, char *path_name,
566 char *channel_name, uint64_t tracefile_size,
567 uint64_t tracefile_count)
568 {
569 int ret;
570 struct relay_stream *stream = NULL;
571 struct relay_session *session = trace->session;
572 bool acquired_reference = false;
573 struct lttng_trace_chunk *current_trace_chunk;
574
575 stream = zmalloc(sizeof(struct relay_stream));
576 if (stream == NULL) {
577 PERROR("relay stream zmalloc");
578 goto error_no_alloc;
579 }
580
581 stream->stream_handle = stream_handle;
582 stream->prev_data_seq = -1ULL;
583 stream->prev_index_seq = -1ULL;
584 stream->last_net_seq_num = -1ULL;
585 stream->ctf_stream_id = -1ULL;
586 stream->tracefile_size = tracefile_size;
587 stream->tracefile_count = tracefile_count;
588 stream->path_name = path_name;
589 stream->channel_name = channel_name;
590 stream->beacon_ts_end = -1ULL;
591 lttng_ht_node_init_u64(&stream->node, stream->stream_handle);
592 pthread_mutex_init(&stream->lock, NULL);
593 urcu_ref_init(&stream->ref);
594 ctf_trace_get(trace);
595 stream->trace = trace;
596
597 pthread_mutex_lock(&trace->session->lock);
598 current_trace_chunk = trace->session->current_trace_chunk;
599 if (current_trace_chunk) {
600 acquired_reference = lttng_trace_chunk_get(current_trace_chunk);
601 }
602 pthread_mutex_unlock(&trace->session->lock);
603 if (!acquired_reference) {
604 ERR("Cannot create stream for channel \"%s\" as a reference to the session's current trace chunk could not be acquired",
605 channel_name);
606 ret = -1;
607 goto end;
608 }
609
610 stream->indexes_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64);
611 if (!stream->indexes_ht) {
612 ERR("Cannot created indexes_ht");
613 ret = -1;
614 goto end;
615 }
616
617 pthread_mutex_lock(&stream->lock);
618 ret = stream_set_trace_chunk(stream, current_trace_chunk);
619 pthread_mutex_unlock(&stream->lock);
620 if (ret) {
621 ERR("Failed to set the current trace chunk of session \"%s\" on newly created stream of channel \"%s\"",
622 trace->session->session_name,
623 stream->channel_name);
624 ret = -1;
625 goto end;
626 }
627 stream->tfa = tracefile_array_create(stream->tracefile_count);
628 if (!stream->tfa) {
629 ret = -1;
630 goto end;
631 }
632
633 stream->is_metadata = !strcmp(stream->channel_name,
634 DEFAULT_METADATA_NAME);
635 stream->in_recv_list = true;
636
637 /*
638 * Add the stream in the recv list of the session. Once the end stream
639 * message is received, all session streams are published.
640 */
641 pthread_mutex_lock(&session->recv_list_lock);
642 cds_list_add_rcu(&stream->recv_node, &session->recv_list);
643 session->stream_count++;
644 pthread_mutex_unlock(&session->recv_list_lock);
645
646 /*
647 * Both in the ctf_trace object and the global stream ht since the data
648 * side of the relayd does not have the concept of session.
649 */
650 lttng_ht_add_unique_u64(relay_streams_ht, &stream->node);
651 stream->in_stream_ht = true;
652
653 DBG("Relay new stream added %s with ID %" PRIu64, stream->channel_name,
654 stream->stream_handle);
655 ret = 0;
656
657 end:
658 if (ret) {
659 if (stream->stream_fd) {
660 stream_fd_put(stream->stream_fd);
661 stream->stream_fd = NULL;
662 }
663 stream_put(stream);
664 stream = NULL;
665 }
666 if (acquired_reference) {
667 lttng_trace_chunk_put(current_trace_chunk);
668 }
669 return stream;
670
671 error_no_alloc:
672 /*
673 * path_name and channel_name need to be freed explicitly here
674 * because we cannot rely on stream_put().
675 */
676 free(path_name);
677 free(channel_name);
678 return NULL;
679 }
680
681 /*
682 * Called with the session lock held.
683 */
684 void stream_publish(struct relay_stream *stream)
685 {
686 struct relay_session *session;
687
688 pthread_mutex_lock(&stream->lock);
689 if (stream->published) {
690 goto unlock;
691 }
692
693 session = stream->trace->session;
694
695 pthread_mutex_lock(&session->recv_list_lock);
696 if (stream->in_recv_list) {
697 cds_list_del_rcu(&stream->recv_node);
698 stream->in_recv_list = false;
699 }
700 pthread_mutex_unlock(&session->recv_list_lock);
701
702 pthread_mutex_lock(&stream->trace->stream_list_lock);
703 cds_list_add_rcu(&stream->stream_node, &stream->trace->stream_list);
704 pthread_mutex_unlock(&stream->trace->stream_list_lock);
705
706 stream->published = true;
707 unlock:
708 pthread_mutex_unlock(&stream->lock);
709 }
710
711 /*
712 * Stream must be protected by holding the stream lock or by virtue of being
713 * called from stream_destroy.
714 */
715 static void stream_unpublish(struct relay_stream *stream)
716 {
717 if (stream->in_stream_ht) {
718 struct lttng_ht_iter iter;
719 int ret;
720
721 iter.iter.node = &stream->node.node;
722 ret = lttng_ht_del(relay_streams_ht, &iter);
723 assert(!ret);
724 stream->in_stream_ht = false;
725 }
726 if (stream->published) {
727 pthread_mutex_lock(&stream->trace->stream_list_lock);
728 cds_list_del_rcu(&stream->stream_node);
729 pthread_mutex_unlock(&stream->trace->stream_list_lock);
730 stream->published = false;
731 }
732 }
733
734 static void stream_destroy(struct relay_stream *stream)
735 {
736 if (stream->indexes_ht) {
737 /*
738 * Calling lttng_ht_destroy in call_rcu worker thread so
739 * we don't hold the RCU read-side lock while calling
740 * it.
741 */
742 lttng_ht_destroy(stream->indexes_ht);
743 }
744 if (stream->tfa) {
745 tracefile_array_destroy(stream->tfa);
746 }
747 free(stream->path_name);
748 free(stream->channel_name);
749 free(stream);
750 }
751
752 static void stream_destroy_rcu(struct rcu_head *rcu_head)
753 {
754 struct relay_stream *stream =
755 caa_container_of(rcu_head, struct relay_stream, rcu_node);
756
757 stream_destroy(stream);
758 }
759
760 /*
761 * No need to take stream->lock since this is only called on the final
762 * stream_put which ensures that a single thread may act on the stream.
763 */
764 static void stream_release(struct urcu_ref *ref)
765 {
766 struct relay_stream *stream =
767 caa_container_of(ref, struct relay_stream, ref);
768 struct relay_session *session;
769
770 session = stream->trace->session;
771
772 DBG("Releasing stream id %" PRIu64, stream->stream_handle);
773
774 pthread_mutex_lock(&session->recv_list_lock);
775 session->stream_count--;
776 if (stream->in_recv_list) {
777 cds_list_del_rcu(&stream->recv_node);
778 stream->in_recv_list = false;
779 }
780 pthread_mutex_unlock(&session->recv_list_lock);
781
782 stream_unpublish(stream);
783
784 if (stream->stream_fd) {
785 stream_fd_put(stream->stream_fd);
786 stream->stream_fd = NULL;
787 }
788 if (stream->index_file) {
789 lttng_index_file_put(stream->index_file);
790 stream->index_file = NULL;
791 }
792 if (stream->trace) {
793 ctf_trace_put(stream->trace);
794 stream->trace = NULL;
795 }
796 stream_complete_rotation(stream);
797 lttng_trace_chunk_put(stream->trace_chunk);
798 stream->trace_chunk = NULL;
799
800 call_rcu(&stream->rcu_node, stream_destroy_rcu);
801 }
802
803 void stream_put(struct relay_stream *stream)
804 {
805 rcu_read_lock();
806 assert(stream->ref.refcount != 0);
807 /*
808 * Wait until we have processed all the stream packets before
809 * actually putting our last stream reference.
810 */
811 urcu_ref_put(&stream->ref, stream_release);
812 rcu_read_unlock();
813 }
814
815 int stream_set_pending_rotation(struct relay_stream *stream,
816 struct lttng_trace_chunk *next_trace_chunk,
817 uint64_t rotation_sequence_number)
818 {
819 int ret = 0;
820 const struct relay_stream_rotation rotation = {
821 .data_rotated = false,
822 .index_rotated = false,
823 .packet_seq_num = rotation_sequence_number,
824 .prev_data_net_seq = -1ULL,
825 .next_trace_chunk = next_trace_chunk,
826 };
827
828 if (stream->ongoing_rotation.is_set) {
829 ERR("Attempted to set a pending rotation on a stream already being rotated (protocol error)");
830 ret = -1;
831 goto end;
832 }
833
834 if (next_trace_chunk) {
835 const bool reference_acquired =
836 lttng_trace_chunk_get(next_trace_chunk);
837
838 assert(reference_acquired);
839 }
840 LTTNG_OPTIONAL_SET(&stream->ongoing_rotation, rotation);
841
842 DBG("Setting pending rotation: stream_id = %" PRIu64
843 ", rotate_at_packet_seq_num = %" PRIu64,
844 stream->stream_handle, rotation_sequence_number);
845 if (stream->is_metadata) {
846 /*
847 * A metadata stream has no index; consider it already rotated.
848 */
849 stream->ongoing_rotation.value.index_rotated = true;
850 ret = stream_rotate_data_file(stream);
851 } else {
852 ret = try_rotate_stream_index(stream);
853 if (ret < 0) {
854 goto end;
855 }
856
857 ret = try_rotate_stream_data(stream);
858 if (ret < 0) {
859 goto end;
860 }
861 }
862 end:
863 return ret;
864 }
865
866 void try_stream_close(struct relay_stream *stream)
867 {
868 bool session_aborted;
869 struct relay_session *session = stream->trace->session;
870
871 DBG("Trying to close stream %" PRIu64, stream->stream_handle);
872
873 pthread_mutex_lock(&session->lock);
874 session_aborted = session->aborted;
875 pthread_mutex_unlock(&session->lock);
876
877 pthread_mutex_lock(&stream->lock);
878 /*
879 * Can be called concurently by connection close and reception of last
880 * pending data.
881 */
882 if (stream->closed) {
883 pthread_mutex_unlock(&stream->lock);
884 DBG("closing stream %" PRIu64 " aborted since it is already marked as closed", stream->stream_handle);
885 return;
886 }
887
888 stream->close_requested = true;
889
890 if (stream->last_net_seq_num == -1ULL) {
891 /*
892 * Handle connection close without explicit stream close
893 * command.
894 *
895 * We can be clever about indexes partially received in
896 * cases where we received the data socket part, but not
897 * the control socket part: since we're currently closing
898 * the stream on behalf of the control socket, we *know*
899 * there won't be any more control information for this
900 * socket. Therefore, we can destroy all indexes for
901 * which we have received only the file descriptor (from
902 * data socket). This takes care of consumerd crashes
903 * between sending the data and control information for
904 * a packet. Since those are sent in that order, we take
905 * care of consumerd crashes.
906 */
907 DBG("relay_index_close_partial_fd");
908 relay_index_close_partial_fd(stream);
909 /*
910 * Use the highest net_seq_num we currently have pending
911 * As end of stream indicator. Leave last_net_seq_num
912 * at -1ULL if we cannot find any index.
913 */
914 stream->last_net_seq_num = relay_index_find_last(stream);
915 DBG("Updating stream->last_net_seq_num to %" PRIu64, stream->last_net_seq_num);
916 /* Fall-through into the next check. */
917 }
918
919 if (stream->last_net_seq_num != -1ULL &&
920 ((int64_t) (stream->prev_data_seq - stream->last_net_seq_num)) < 0
921 && !session_aborted) {
922 /*
923 * Don't close since we still have data pending. This
924 * handles cases where an explicit close command has
925 * been received for this stream, and cases where the
926 * connection has been closed, and we are awaiting for
927 * index information from the data socket. It is
928 * therefore expected that all the index fd information
929 * we need has already been received on the control
930 * socket. Matching index information from data socket
931 * should be Expected Soon(TM).
932 *
933 * TODO: We should implement a timer to garbage collect
934 * streams after a timeout to be resilient against a
935 * consumerd implementation that would not match this
936 * expected behavior.
937 */
938 pthread_mutex_unlock(&stream->lock);
939 DBG("closing stream %" PRIu64 " aborted since it still has data pending", stream->stream_handle);
940 return;
941 }
942 /*
943 * We received all the indexes we can expect.
944 */
945 stream_unpublish(stream);
946 stream->closed = true;
947 /* Relay indexes are only used by the "consumer/sessiond" end. */
948 relay_index_close_all(stream);
949
950 /*
951 * If we are closed by an application exiting (per-pid buffers),
952 * we need to put our reference on the stream trace chunk right
953 * away, because otherwise still holding the reference on the
954 * trace chunk could allow a viewer stream (which holds a reference
955 * to the stream) to postpone destroy waiting for the chunk to cease
956 * to exist endlessly until the viewer is detached.
957 */
958
959 /* Put stream fd before put chunk. */
960 if (stream->stream_fd) {
961 stream_fd_put(stream->stream_fd);
962 stream->stream_fd = NULL;
963 }
964 if (stream->index_file) {
965 lttng_index_file_put(stream->index_file);
966 stream->index_file = NULL;
967 }
968 lttng_trace_chunk_put(stream->trace_chunk);
969 stream->trace_chunk = NULL;
970 pthread_mutex_unlock(&stream->lock);
971 DBG("Succeeded in closing stream %" PRIu64, stream->stream_handle);
972 stream_put(stream);
973 }
974
975 int stream_init_packet(struct relay_stream *stream, size_t packet_size,
976 bool *file_rotated)
977 {
978 int ret = 0;
979
980 ASSERT_LOCKED(stream->lock);
981
982 if (!stream->stream_fd || !stream->trace_chunk) {
983 ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
984 stream->stream_handle, stream->channel_name);
985 ret = -1;
986 goto end;
987 }
988
989 if (caa_likely(stream->tracefile_size == 0)) {
990 /* No size limit set; nothing to check. */
991 goto end;
992 }
993
994 /*
995 * Check if writing the new packet would exceed the maximal file size.
996 */
997 if (caa_unlikely((stream->tracefile_size_current + packet_size) >
998 stream->tracefile_size)) {
999 const uint64_t new_file_index =
1000 (stream->tracefile_current_index + 1) %
1001 stream->tracefile_count;
1002
1003 if (new_file_index < stream->tracefile_current_index) {
1004 stream->tracefile_wrapped_around = true;
1005 }
1006 DBG("New stream packet causes stream file rotation: stream_id = %" PRIu64
1007 ", current_file_size = %" PRIu64
1008 ", packet_size = %zu, current_file_index = %" PRIu64
1009 " new_file_index = %" PRIu64,
1010 stream->stream_handle,
1011 stream->tracefile_size_current, packet_size,
1012 stream->tracefile_current_index, new_file_index);
1013 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_WRITE);
1014 stream->tracefile_current_index = new_file_index;
1015
1016 if (stream->stream_fd) {
1017 stream_fd_put(stream->stream_fd);
1018 stream->stream_fd = NULL;
1019 }
1020 ret = stream_create_data_output_file_from_trace_chunk(stream,
1021 stream->trace_chunk, false, &stream->stream_fd);
1022 if (ret) {
1023 ERR("Failed to perform trace file rotation of stream %" PRIu64,
1024 stream->stream_handle);
1025 goto end;
1026 }
1027
1028 /*
1029 * Reset current size because we just performed a stream
1030 * rotation.
1031 */
1032 stream->tracefile_size_current = 0;
1033 *file_rotated = true;
1034 } else {
1035 *file_rotated = false;
1036 }
1037 end:
1038 return ret;
1039 }
1040
1041 /* Note that the packet is not necessarily complete. */
1042 int stream_write(struct relay_stream *stream,
1043 const struct lttng_buffer_view *packet, size_t padding_len)
1044 {
1045 int ret = 0;
1046 ssize_t write_ret;
1047 size_t padding_to_write = padding_len;
1048 char padding_buffer[FILE_IO_STACK_BUFFER_SIZE];
1049
1050 ASSERT_LOCKED(stream->lock);
1051 memset(padding_buffer, 0,
1052 min(sizeof(padding_buffer), padding_to_write));
1053
1054 if (!stream->stream_fd || !stream->trace_chunk) {
1055 ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
1056 stream->stream_handle, stream->channel_name);
1057 ret = -1;
1058 goto end;
1059 }
1060 if (packet) {
1061 write_ret = lttng_write(stream->stream_fd->fd,
1062 packet->data, packet->size);
1063 if (write_ret != packet->size) {
1064 PERROR("Failed to write to stream file of %sstream %" PRIu64,
1065 stream->is_metadata ? "metadata " : "",
1066 stream->stream_handle);
1067 ret = -1;
1068 goto end;
1069 }
1070 }
1071
1072 while (padding_to_write > 0) {
1073 const size_t padding_to_write_this_pass =
1074 min(padding_to_write, sizeof(padding_buffer));
1075
1076 write_ret = lttng_write(stream->stream_fd->fd,
1077 padding_buffer, padding_to_write_this_pass);
1078 if (write_ret != padding_to_write_this_pass) {
1079 PERROR("Failed to write padding to file of %sstream %" PRIu64,
1080 stream->is_metadata ? "metadata " : "",
1081 stream->stream_handle);
1082 ret = -1;
1083 goto end;
1084 }
1085 padding_to_write -= padding_to_write_this_pass;
1086 }
1087
1088 if (stream->is_metadata) {
1089 stream->metadata_received += packet ? packet->size : 0;
1090 stream->metadata_received += padding_len;
1091 }
1092
1093 DBG("Wrote to %sstream %" PRIu64 ": data_length = %zu, padding_length = %zu",
1094 stream->is_metadata ? "metadata " : "",
1095 stream->stream_handle,
1096 packet ? packet->size : (size_t) 0, padding_len);
1097 end:
1098 return ret;
1099 }
1100
1101 /*
1102 * Update index after receiving a packet for a data stream.
1103 *
1104 * Called with the stream lock held.
1105 *
1106 * Return 0 on success else a negative value.
1107 */
1108 int stream_update_index(struct relay_stream *stream, uint64_t net_seq_num,
1109 bool rotate_index, bool *flushed, uint64_t total_size)
1110 {
1111 int ret = 0;
1112 uint64_t data_offset;
1113 struct relay_index *index;
1114
1115 assert(stream->trace_chunk);
1116 ASSERT_LOCKED(stream->lock);
1117 /* Get data offset because we are about to update the index. */
1118 data_offset = htobe64(stream->tracefile_size_current);
1119
1120 DBG("handle_index_data: stream %" PRIu64 " net_seq_num %" PRIu64 " data offset %" PRIu64,
1121 stream->stream_handle, net_seq_num, stream->tracefile_size_current);
1122
1123 /*
1124 * Lookup for an existing index for that stream id/sequence
1125 * number. If it exists, the control thread has already received the
1126 * data for it, thus we need to write it to disk.
1127 */
1128 index = relay_index_get_by_id_or_create(stream, net_seq_num);
1129 if (!index) {
1130 ret = -1;
1131 goto end;
1132 }
1133
1134 if (rotate_index || !stream->index_file) {
1135 ret = create_index_file(stream, stream->trace_chunk);
1136 if (ret) {
1137 ERR("Failed to create index file for stream %" PRIu64,
1138 stream->stream_handle);
1139 /* Put self-ref for this index due to error. */
1140 relay_index_put(index);
1141 index = NULL;
1142 goto end;
1143 }
1144 }
1145
1146 if (relay_index_set_file(index, stream->index_file, data_offset)) {
1147 ret = -1;
1148 /* Put self-ref for this index due to error. */
1149 relay_index_put(index);
1150 index = NULL;
1151 goto end;
1152 }
1153
1154 ret = relay_index_try_flush(index);
1155 if (ret == 0) {
1156 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
1157 tracefile_array_commit_seq(stream->tfa);
1158 stream->index_received_seqcount++;
1159 LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
1160 be64toh(index->index_data.packet_seq_num));
1161 *flushed = true;
1162 } else if (ret > 0) {
1163 index->total_size = total_size;
1164 /* No flush. */
1165 ret = 0;
1166 } else {
1167 /*
1168 * ret < 0
1169 *
1170 * relay_index_try_flush is responsible for the self-reference
1171 * put of the index object on error.
1172 */
1173 ERR("relay_index_try_flush error %d", ret);
1174 ret = -1;
1175 }
1176 end:
1177 return ret;
1178 }
1179
1180 int stream_complete_packet(struct relay_stream *stream, size_t packet_total_size,
1181 uint64_t sequence_number, bool index_flushed)
1182 {
1183 int ret = 0;
1184
1185 ASSERT_LOCKED(stream->lock);
1186
1187 stream->tracefile_size_current += packet_total_size;
1188 if (index_flushed) {
1189 stream->pos_after_last_complete_data_index =
1190 stream->tracefile_size_current;
1191 stream->prev_index_seq = sequence_number;
1192 ret = try_rotate_stream_index(stream);
1193 if (ret < 0) {
1194 goto end;
1195 }
1196 }
1197
1198 stream->prev_data_seq = sequence_number;
1199 ret = try_rotate_stream_data(stream);
1200
1201 end:
1202 return ret;
1203 }
1204
1205 int stream_add_index(struct relay_stream *stream,
1206 const struct lttcomm_relayd_index *index_info)
1207 {
1208 int ret = 0;
1209 struct relay_index *index;
1210
1211 ASSERT_LOCKED(stream->lock);
1212
1213 /* Live beacon handling */
1214 if (index_info->packet_size == 0) {
1215 DBG("Received live beacon for stream %" PRIu64,
1216 stream->stream_handle);
1217
1218 /*
1219 * Only flag a stream inactive when it has already
1220 * received data and no indexes are in flight.
1221 */
1222 if (stream->index_received_seqcount > 0
1223 && stream->indexes_in_flight == 0) {
1224 stream->beacon_ts_end = index_info->timestamp_end;
1225 }
1226 ret = 0;
1227 goto end;
1228 } else {
1229 stream->beacon_ts_end = -1ULL;
1230 }
1231
1232 if (stream->ctf_stream_id == -1ULL) {
1233 stream->ctf_stream_id = index_info->stream_id;
1234 }
1235
1236 index = relay_index_get_by_id_or_create(stream, index_info->net_seq_num);
1237 if (!index) {
1238 ret = -1;
1239 ERR("Failed to get or create index %" PRIu64,
1240 index_info->net_seq_num);
1241 goto end;
1242 }
1243 if (relay_index_set_control_data(index, index_info,
1244 stream->trace->session->minor)) {
1245 ERR("set_index_control_data error");
1246 relay_index_put(index);
1247 ret = -1;
1248 goto end;
1249 }
1250 ret = relay_index_try_flush(index);
1251 if (ret == 0) {
1252 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
1253 tracefile_array_commit_seq(stream->tfa);
1254 stream->index_received_seqcount++;
1255 stream->pos_after_last_complete_data_index += index->total_size;
1256 stream->prev_index_seq = index_info->net_seq_num;
1257 LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
1258 index_info->packet_seq_num);
1259
1260 ret = try_rotate_stream_index(stream);
1261 if (ret < 0) {
1262 goto end;
1263 }
1264 ret = try_rotate_stream_data(stream);
1265 if (ret < 0) {
1266 goto end;
1267 }
1268 } else if (ret > 0) {
1269 /* no flush. */
1270 ret = 0;
1271 } else {
1272 /*
1273 * ret < 0
1274 *
1275 * relay_index_try_flush is responsible for the self-reference
1276 * put of the index object on error.
1277 */
1278 ERR("relay_index_try_flush error %d", ret);
1279 ret = -1;
1280 }
1281 end:
1282 return ret;
1283 }
1284
1285 static void print_stream_indexes(struct relay_stream *stream)
1286 {
1287 struct lttng_ht_iter iter;
1288 struct relay_index *index;
1289
1290 rcu_read_lock();
1291 cds_lfht_for_each_entry(stream->indexes_ht->ht, &iter.iter, index,
1292 index_n.node) {
1293 DBG("index %p net_seq_num %" PRIu64 " refcount %ld"
1294 " stream %" PRIu64 " trace %" PRIu64
1295 " session %" PRIu64,
1296 index,
1297 index->index_n.key,
1298 stream->ref.refcount,
1299 index->stream->stream_handle,
1300 index->stream->trace->id,
1301 index->stream->trace->session->id);
1302 }
1303 rcu_read_unlock();
1304 }
1305
1306 int stream_reset_file(struct relay_stream *stream)
1307 {
1308 ASSERT_LOCKED(stream->lock);
1309
1310 if (stream->stream_fd) {
1311 stream_fd_put(stream->stream_fd);
1312 stream->stream_fd = NULL;
1313 }
1314
1315 stream->tracefile_size_current = 0;
1316 stream->prev_data_seq = 0;
1317 stream->prev_index_seq = 0;
1318 /* Note that this does not reset the tracefile array. */
1319 stream->tracefile_current_index = 0;
1320 stream->pos_after_last_complete_data_index = 0;
1321
1322 return stream_create_data_output_file_from_trace_chunk(stream,
1323 stream->trace_chunk, true, &stream->stream_fd);
1324 }
1325
1326 void print_relay_streams(void)
1327 {
1328 struct lttng_ht_iter iter;
1329 struct relay_stream *stream;
1330
1331 if (!relay_streams_ht) {
1332 return;
1333 }
1334
1335 rcu_read_lock();
1336 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream,
1337 node.node) {
1338 if (!stream_get(stream)) {
1339 continue;
1340 }
1341 DBG("stream %p refcount %ld stream %" PRIu64 " trace %" PRIu64
1342 " session %" PRIu64,
1343 stream,
1344 stream->ref.refcount,
1345 stream->stream_handle,
1346 stream->trace->id,
1347 stream->trace->session->id);
1348 print_stream_indexes(stream);
1349 stream_put(stream);
1350 }
1351 rcu_read_unlock();
1352 }
This page took 0.09756 seconds and 4 git commands to generate.