b0521c7d8a1fc6c322dc70ded83b0897e5464878
[lttng-tools.git] / src / bin / lttng-relayd / stream.c
1 /*
2 * Copyright (C) 2013 Julien Desfossez <jdesfossez@efficios.com>
3 * Copyright (C) 2013 David Goulet <dgoulet@efficios.com>
4 * Copyright (C) 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 * Copyright (C) 2019 Jérémie Galarneau <jeremie.galarneau@efficios.com>
6 *
7 * SPDX-License-Identifier: GPL-2.0-only
8 *
9 */
10
11 #define _LGPL_SOURCE
12 #include <common/common.h>
13 #include <common/defaults.h>
14 #include <common/fs-handle.h>
15 #include <common/sessiond-comm/relayd.h>
16 #include <common/utils.h>
17 #include <sys/stat.h>
18 #include <urcu/rculist.h>
19
20 #include "lttng-relayd.h"
21 #include "index.h"
22 #include "stream.h"
23 #include "viewer-stream.h"
24
25 #include <sys/types.h>
26 #include <fcntl.h>
27
28 #define FILE_IO_STACK_BUFFER_SIZE 65536
29
30 /* Should be called with RCU read-side lock held. */
31 bool stream_get(struct relay_stream *stream)
32 {
33 return urcu_ref_get_unless_zero(&stream->ref);
34 }
35
36 /*
37 * Get stream from stream id from the streams hash table. Return stream
38 * if found else NULL. A stream reference is taken when a stream is
39 * returned. stream_put() must be called on that stream.
40 */
41 struct relay_stream *stream_get_by_id(uint64_t stream_id)
42 {
43 struct lttng_ht_node_u64 *node;
44 struct lttng_ht_iter iter;
45 struct relay_stream *stream = NULL;
46
47 rcu_read_lock();
48 lttng_ht_lookup(relay_streams_ht, &stream_id, &iter);
49 node = lttng_ht_iter_get_node_u64(&iter);
50 if (!node) {
51 DBG("Relay stream %" PRIu64 " not found", stream_id);
52 goto end;
53 }
54 stream = caa_container_of(node, struct relay_stream, node);
55 if (!stream_get(stream)) {
56 stream = NULL;
57 }
58 end:
59 rcu_read_unlock();
60 return stream;
61 }
62
63 static void stream_complete_rotation(struct relay_stream *stream)
64 {
65 DBG("Rotation completed for stream %" PRIu64, stream->stream_handle);
66 if (stream->ongoing_rotation.value.next_trace_chunk) {
67 tracefile_array_reset(stream->tfa);
68 tracefile_array_commit_seq(stream->tfa,
69 stream->index_received_seqcount);
70 }
71 lttng_trace_chunk_put(stream->trace_chunk);
72 stream->trace_chunk = stream->ongoing_rotation.value.next_trace_chunk;
73 stream->ongoing_rotation = (typeof(stream->ongoing_rotation)) {};
74 }
75
76 static int stream_create_data_output_file_from_trace_chunk(
77 struct relay_stream *stream,
78 struct lttng_trace_chunk *trace_chunk,
79 bool force_unlink,
80 struct fs_handle **out_file)
81 {
82 int ret;
83 char stream_path[LTTNG_PATH_MAX];
84 enum lttng_trace_chunk_status status;
85 const int flags = O_RDWR | O_CREAT | O_TRUNC;
86 const mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
87
88 ASSERT_LOCKED(stream->lock);
89
90 ret = utils_stream_file_path(stream->path_name, stream->channel_name,
91 stream->tracefile_size, stream->tracefile_current_index,
92 NULL, stream_path, sizeof(stream_path));
93 if (ret < 0) {
94 goto end;
95 }
96
97 if (stream->tracefile_wrapped_around || force_unlink) {
98 /*
99 * The on-disk ring-buffer has wrapped around.
100 * Newly created stream files will replace existing files. Since
101 * live clients may be consuming existing files, the file about
102 * to be replaced is unlinked in order to not overwrite its
103 * content.
104 */
105 status = lttng_trace_chunk_unlink_file(trace_chunk,
106 stream_path);
107 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
108 PERROR("Failed to unlink stream file \"%s\" during trace file rotation",
109 stream_path);
110 /*
111 * Don't abort if the file doesn't exist, it is
112 * unexpected, but should not be a fatal error.
113 */
114 if (errno != ENOENT) {
115 ret = -1;
116 goto end;
117 }
118 }
119 }
120
121 status = lttng_trace_chunk_open_fs_handle(trace_chunk, stream_path,
122 flags, mode, out_file, false);
123 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
124 ERR("Failed to open stream file \"%s\"", stream->channel_name);
125 ret = -1;
126 goto end;
127 }
128 end:
129 return ret;
130 }
131
132 static int stream_rotate_data_file(struct relay_stream *stream)
133 {
134 int ret = 0;
135
136 DBG("Rotating stream %" PRIu64 " data file with size %" PRIu64,
137 stream->stream_handle, stream->tracefile_size_current);
138
139 if (stream->file) {
140 fs_handle_close(stream->file);
141 stream->file = NULL;
142 }
143
144 stream->tracefile_wrapped_around = false;
145 stream->tracefile_current_index = 0;
146
147 if (stream->ongoing_rotation.value.next_trace_chunk) {
148 enum lttng_trace_chunk_status chunk_status;
149
150 chunk_status = lttng_trace_chunk_create_subdirectory(
151 stream->ongoing_rotation.value.next_trace_chunk,
152 stream->path_name);
153 if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) {
154 ret = -1;
155 goto end;
156 }
157
158 /* Rotate the data file. */
159 ret = stream_create_data_output_file_from_trace_chunk(stream,
160 stream->ongoing_rotation.value.next_trace_chunk,
161 false, &stream->file);
162 if (ret < 0) {
163 ERR("Failed to rotate stream data file");
164 goto end;
165 }
166 }
167 DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
168 __func__, stream->stream_handle, stream->tracefile_size_current);
169 stream->tracefile_size_current = 0;
170 stream->pos_after_last_complete_data_index = 0;
171 stream->ongoing_rotation.value.data_rotated = true;
172
173 if (stream->ongoing_rotation.value.index_rotated) {
174 /* Rotation completed; reset its state. */
175 stream_complete_rotation(stream);
176 }
177 end:
178 return ret;
179 }
180
181 /*
182 * If too much data has been written in a tracefile before we received the
183 * rotation command, we have to move the excess data to the new tracefile and
184 * perform the rotation. This can happen because the control and data
185 * connections are separate, the indexes as well as the commands arrive from
186 * the control connection and we have no control over the order so we could be
187 * in a situation where too much data has been received on the data connection
188 * before the rotation command on the control connection arrives.
189 */
190 static int rotate_truncate_stream(struct relay_stream *stream)
191 {
192 int ret;
193 off_t lseek_ret, previous_stream_copy_origin;
194 uint64_t copy_bytes_left, misplaced_data_size;
195 bool acquired_reference;
196 struct fs_handle *previous_stream_file = NULL;
197 struct lttng_trace_chunk *previous_chunk = NULL;
198
199 if (!LTTNG_OPTIONAL_GET(stream->ongoing_rotation).next_trace_chunk) {
200 ERR("Protocol error encoutered in %s(): stream rotation "
201 "sequence number is before the current sequence number "
202 "and the next trace chunk is unset. Honoring this "
203 "rotation command would result in data loss",
204 __FUNCTION__);
205 ret = -1;
206 goto end;
207 }
208
209 ASSERT_LOCKED(stream->lock);
210 /*
211 * Acquire a reference to the current trace chunk to ensure
212 * it is not reclaimed when `stream_rotate_data_file` is called.
213 * Failing to do so would violate the contract of the trace
214 * chunk API as an active file descriptor would outlive the
215 * trace chunk.
216 */
217 acquired_reference = lttng_trace_chunk_get(stream->trace_chunk);
218 assert(acquired_reference);
219 previous_chunk = stream->trace_chunk;
220
221 /*
222 * Steal the stream's reference to its stream_fd. A new
223 * stream_fd will be created when the rotation completes and
224 * the orinal stream_fd will be used to copy the "extra" data
225 * to the new file.
226 */
227 assert(stream->file);
228 previous_stream_file = stream->file;
229 stream->file = NULL;
230
231 assert(!stream->is_metadata);
232 assert(stream->tracefile_size_current >
233 stream->pos_after_last_complete_data_index);
234 misplaced_data_size = stream->tracefile_size_current -
235 stream->pos_after_last_complete_data_index;
236 copy_bytes_left = misplaced_data_size;
237 previous_stream_copy_origin = stream->pos_after_last_complete_data_index;
238
239 ret = stream_rotate_data_file(stream);
240 if (ret) {
241 goto end;
242 }
243
244 assert(stream->file);
245 /*
246 * Seek the current tracefile to the position at which the rotation
247 * should have occurred.
248 */
249 lseek_ret = fs_handle_seek(previous_stream_file, previous_stream_copy_origin, SEEK_SET);
250 if (lseek_ret < 0) {
251 PERROR("Failed to seek to offset %" PRIu64
252 " while copying extra data received before a stream rotation",
253 (uint64_t) previous_stream_copy_origin);
254 ret = -1;
255 goto end;
256 }
257
258 /* Move data from the old file to the new file. */
259 while (copy_bytes_left) {
260 ssize_t io_ret;
261 char copy_buffer[FILE_IO_STACK_BUFFER_SIZE];
262 const off_t copy_size_this_pass = min_t(
263 off_t, copy_bytes_left, sizeof(copy_buffer));
264
265 io_ret = fs_handle_read(previous_stream_file, copy_buffer,
266 copy_size_this_pass);
267 if (io_ret < (ssize_t) copy_size_this_pass) {
268 if (io_ret == -1) {
269 PERROR("Failed to read %" PRIu64
270 " bytes from previous stream file in %s(), returned %zi: stream id = %" PRIu64,
271 copy_size_this_pass,
272 __FUNCTION__, io_ret,
273 stream->stream_handle);
274 } else {
275 ERR("Failed to read %" PRIu64
276 " bytes from previous stream file in %s(), returned %zi: stream id = %" PRIu64,
277 copy_size_this_pass,
278 __FUNCTION__, io_ret,
279 stream->stream_handle);
280 }
281 ret = -1;
282 goto end;
283 }
284
285 io_ret = fs_handle_write(
286 stream->file, copy_buffer, copy_size_this_pass);
287 if (io_ret < (ssize_t) copy_size_this_pass) {
288 if (io_ret == -1) {
289 PERROR("Failed to write %" PRIu64
290 " bytes from previous stream file in %s(), returned %zi: stream id = %" PRIu64,
291 copy_size_this_pass,
292 __FUNCTION__, io_ret,
293 stream->stream_handle);
294 } else {
295 ERR("Failed to write %" PRIu64
296 " bytes from previous stream file in %s(), returned %zi: stream id = %" PRIu64,
297 copy_size_this_pass,
298 __FUNCTION__, io_ret,
299 stream->stream_handle);
300 }
301 ret = -1;
302 goto end;
303 }
304 copy_bytes_left -= copy_size_this_pass;
305 }
306
307 /* Truncate the file to get rid of the excess data. */
308 ret = fs_handle_truncate(
309 previous_stream_file, previous_stream_copy_origin);
310 if (ret) {
311 PERROR("Failed to truncate current stream file to offset %" PRIu64,
312 previous_stream_copy_origin);
313 goto end;
314 }
315
316 /*
317 * Update the offset and FD of all the eventual indexes created by the
318 * data connection before the rotation command arrived.
319 */
320 ret = relay_index_switch_all_files(stream);
321 if (ret < 0) {
322 ERR("Failed to rotate index file");
323 goto end;
324 }
325
326 stream->tracefile_size_current = misplaced_data_size;
327 /* Index and data contents are back in sync. */
328 stream->pos_after_last_complete_data_index = 0;
329 ret = 0;
330 end:
331 lttng_trace_chunk_put(previous_chunk);
332 return ret;
333 }
334
335 /*
336 * Check if a stream's data file (as opposed to index) should be rotated
337 * (for session rotation).
338 * Must be called with the stream lock held.
339 *
340 * Return 0 on success, a negative value on error.
341 */
342 static int try_rotate_stream_data(struct relay_stream *stream)
343 {
344 int ret = 0;
345
346 if (caa_likely(!stream->ongoing_rotation.is_set)) {
347 /* No rotation expected. */
348 goto end;
349 }
350
351 if (stream->ongoing_rotation.value.data_rotated) {
352 /* Rotation of the data file has already occurred. */
353 goto end;
354 }
355
356 DBG("%s: Stream %" PRIu64
357 " (rotate_at_index_packet_seq_num = %" PRIu64
358 ", rotate_at_prev_data_net_seq = %" PRIu64
359 ", prev_data_seq = %" PRIu64 ")",
360 __func__, stream->stream_handle,
361 stream->ongoing_rotation.value.packet_seq_num,
362 stream->ongoing_rotation.value.prev_data_net_seq,
363 stream->prev_data_seq);
364
365 if (stream->prev_data_seq == -1ULL ||
366 stream->ongoing_rotation.value.prev_data_net_seq == -1ULL ||
367 stream->prev_data_seq <
368 stream->ongoing_rotation.value.prev_data_net_seq) {
369 /*
370 * The next packet that will be written is not part of the next
371 * chunk yet.
372 */
373 DBG("Stream %" PRIu64 " data not yet ready for rotation "
374 "(rotate_at_index_packet_seq_num = %" PRIu64
375 ", rotate_at_prev_data_net_seq = %" PRIu64
376 ", prev_data_seq = %" PRIu64 ")",
377 stream->stream_handle,
378 stream->ongoing_rotation.value.packet_seq_num,
379 stream->ongoing_rotation.value.prev_data_net_seq,
380 stream->prev_data_seq);
381 goto end;
382 } else if (stream->prev_data_seq > stream->ongoing_rotation.value.prev_data_net_seq) {
383 /*
384 * prev_data_seq is checked here since indexes and rotation
385 * commands are serialized with respect to each other.
386 */
387 DBG("Rotation after too much data has been written in tracefile "
388 "for stream %" PRIu64 ", need to truncate before "
389 "rotating", stream->stream_handle);
390 ret = rotate_truncate_stream(stream);
391 if (ret) {
392 ERR("Failed to truncate stream");
393 goto end;
394 }
395 } else {
396 ret = stream_rotate_data_file(stream);
397 }
398
399 end:
400 return ret;
401 }
402
403 /*
404 * Close the current index file if it is open, and create a new one.
405 *
406 * Return 0 on success, -1 on error.
407 */
408 static int create_index_file(struct relay_stream *stream,
409 struct lttng_trace_chunk *chunk)
410 {
411 int ret;
412 uint32_t major, minor;
413 char *index_subpath = NULL;
414 enum lttng_trace_chunk_status status;
415
416 ASSERT_LOCKED(stream->lock);
417
418 /* Put ref on previous index_file. */
419 if (stream->index_file) {
420 lttng_index_file_put(stream->index_file);
421 stream->index_file = NULL;
422 }
423 major = stream->trace->session->major;
424 minor = stream->trace->session->minor;
425
426 if (!chunk) {
427 ret = 0;
428 goto end;
429 }
430 ret = asprintf(&index_subpath, "%s/%s", stream->path_name,
431 DEFAULT_INDEX_DIR);
432 if (ret < 0) {
433 goto end;
434 }
435
436 status = lttng_trace_chunk_create_subdirectory(chunk,
437 index_subpath);
438 free(index_subpath);
439 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
440 ret = -1;
441 goto end;
442 }
443 status = lttng_index_file_create_from_trace_chunk(
444 chunk, stream->path_name,
445 stream->channel_name, stream->tracefile_size,
446 stream->tracefile_current_index,
447 lttng_to_index_major(major, minor),
448 lttng_to_index_minor(major, minor), true,
449 &stream->index_file);
450 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
451 ret = -1;
452 goto end;
453 }
454
455 ret = 0;
456
457 end:
458 return ret;
459 }
460
461 /*
462 * Check if a stream's index file should be rotated (for session rotation).
463 * Must be called with the stream lock held.
464 *
465 * Return 0 on success, a negative value on error.
466 */
467 static int try_rotate_stream_index(struct relay_stream *stream)
468 {
469 int ret = 0;
470
471 if (!stream->ongoing_rotation.is_set) {
472 /* No rotation expected. */
473 goto end;
474 }
475
476 if (stream->ongoing_rotation.value.index_rotated) {
477 /* Rotation of the index has already occurred. */
478 goto end;
479 }
480
481 DBG("%s: Stream %" PRIu64
482 " (rotate_at_packet_seq_num = %" PRIu64
483 ", received_packet_seq_num = "
484 "(value = %" PRIu64 ", is_set = %" PRIu8 "))",
485 __func__, stream->stream_handle,
486 stream->ongoing_rotation.value.packet_seq_num,
487 stream->received_packet_seq_num.value,
488 stream->received_packet_seq_num.is_set);
489
490 if (!stream->received_packet_seq_num.is_set ||
491 LTTNG_OPTIONAL_GET(stream->received_packet_seq_num) + 1 <
492 stream->ongoing_rotation.value.packet_seq_num) {
493 DBG("Stream %" PRIu64 " index not yet ready for rotation "
494 "(rotate_at_packet_seq_num = %" PRIu64
495 ", received_packet_seq_num = "
496 "(value = %" PRIu64 ", is_set = %" PRIu8 "))",
497 stream->stream_handle,
498 stream->ongoing_rotation.value.packet_seq_num,
499 stream->received_packet_seq_num.value,
500 stream->received_packet_seq_num.is_set);
501 goto end;
502 } else {
503 /*
504 * The next index belongs to the new trace chunk; rotate.
505 * In overwrite mode, the packet seq num may jump over the
506 * rotation position.
507 */
508 assert(LTTNG_OPTIONAL_GET(stream->received_packet_seq_num) + 1 >=
509 stream->ongoing_rotation.value.packet_seq_num);
510 DBG("Rotating stream %" PRIu64 " index file",
511 stream->stream_handle);
512 if (stream->index_file) {
513 lttng_index_file_put(stream->index_file);
514 stream->index_file = NULL;
515 }
516 stream->ongoing_rotation.value.index_rotated = true;
517
518 /*
519 * Set the rotation pivot position for the data, now that we have the
520 * net_seq_num matching the packet_seq_num index pivot position.
521 */
522 stream->ongoing_rotation.value.prev_data_net_seq =
523 stream->prev_index_seq;
524 if (stream->ongoing_rotation.value.data_rotated &&
525 stream->ongoing_rotation.value.index_rotated) {
526 /* Rotation completed; reset its state. */
527 DBG("Rotation completed for stream %" PRIu64,
528 stream->stream_handle);
529 stream_complete_rotation(stream);
530 }
531 }
532
533 end:
534 return ret;
535 }
536
537 static int stream_set_trace_chunk(struct relay_stream *stream,
538 struct lttng_trace_chunk *chunk)
539 {
540 int ret = 0;
541 enum lttng_trace_chunk_status status;
542 bool acquired_reference;
543
544 status = lttng_trace_chunk_create_subdirectory(chunk,
545 stream->path_name);
546 if (status != LTTNG_TRACE_CHUNK_STATUS_OK) {
547 ret = -1;
548 goto end;
549 }
550
551 lttng_trace_chunk_put(stream->trace_chunk);
552 acquired_reference = lttng_trace_chunk_get(chunk);
553 assert(acquired_reference);
554 stream->trace_chunk = chunk;
555
556 if (stream->file) {
557 fs_handle_close(stream->file);
558 stream->file = NULL;
559 }
560 ret = stream_create_data_output_file_from_trace_chunk(stream, chunk,
561 false, &stream->file);
562 end:
563 return ret;
564 }
565
566 /*
567 * We keep ownership of path_name and channel_name.
568 */
569 struct relay_stream *stream_create(struct ctf_trace *trace,
570 uint64_t stream_handle, char *path_name,
571 char *channel_name, uint64_t tracefile_size,
572 uint64_t tracefile_count)
573 {
574 int ret;
575 struct relay_stream *stream = NULL;
576 struct relay_session *session = trace->session;
577 bool acquired_reference = false;
578 struct lttng_trace_chunk *current_trace_chunk;
579
580 stream = zmalloc(sizeof(struct relay_stream));
581 if (stream == NULL) {
582 PERROR("relay stream zmalloc");
583 goto error_no_alloc;
584 }
585
586 stream->stream_handle = stream_handle;
587 stream->prev_data_seq = -1ULL;
588 stream->prev_index_seq = -1ULL;
589 stream->last_net_seq_num = -1ULL;
590 stream->ctf_stream_id = -1ULL;
591 stream->tracefile_size = tracefile_size;
592 stream->tracefile_count = tracefile_count;
593 stream->path_name = path_name;
594 stream->channel_name = channel_name;
595 stream->beacon_ts_end = -1ULL;
596 lttng_ht_node_init_u64(&stream->node, stream->stream_handle);
597 pthread_mutex_init(&stream->lock, NULL);
598 urcu_ref_init(&stream->ref);
599 ctf_trace_get(trace);
600 stream->trace = trace;
601
602 pthread_mutex_lock(&trace->session->lock);
603 current_trace_chunk = trace->session->current_trace_chunk;
604 if (current_trace_chunk) {
605 acquired_reference = lttng_trace_chunk_get(current_trace_chunk);
606 }
607 pthread_mutex_unlock(&trace->session->lock);
608 if (!acquired_reference) {
609 ERR("Cannot create stream for channel \"%s\" as a reference to the session's current trace chunk could not be acquired",
610 channel_name);
611 ret = -1;
612 goto end;
613 }
614
615 stream->indexes_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64);
616 if (!stream->indexes_ht) {
617 ERR("Cannot created indexes_ht");
618 ret = -1;
619 goto end;
620 }
621
622 pthread_mutex_lock(&stream->lock);
623 ret = stream_set_trace_chunk(stream, current_trace_chunk);
624 pthread_mutex_unlock(&stream->lock);
625 if (ret) {
626 ERR("Failed to set the current trace chunk of session \"%s\" on newly created stream of channel \"%s\"",
627 trace->session->session_name,
628 stream->channel_name);
629 ret = -1;
630 goto end;
631 }
632 stream->tfa = tracefile_array_create(stream->tracefile_count);
633 if (!stream->tfa) {
634 ret = -1;
635 goto end;
636 }
637
638 stream->is_metadata = !strcmp(stream->channel_name,
639 DEFAULT_METADATA_NAME);
640 stream->in_recv_list = true;
641
642 /*
643 * Add the stream in the recv list of the session. Once the end stream
644 * message is received, all session streams are published.
645 */
646 pthread_mutex_lock(&session->recv_list_lock);
647 cds_list_add_rcu(&stream->recv_node, &session->recv_list);
648 session->stream_count++;
649 pthread_mutex_unlock(&session->recv_list_lock);
650
651 /*
652 * Both in the ctf_trace object and the global stream ht since the data
653 * side of the relayd does not have the concept of session.
654 */
655 lttng_ht_add_unique_u64(relay_streams_ht, &stream->node);
656 stream->in_stream_ht = true;
657
658 DBG("Relay new stream added %s with ID %" PRIu64, stream->channel_name,
659 stream->stream_handle);
660 ret = 0;
661
662 end:
663 if (ret) {
664 if (stream->file) {
665 fs_handle_close(stream->file);
666 stream->file = NULL;
667 }
668 stream_put(stream);
669 stream = NULL;
670 }
671 if (acquired_reference) {
672 lttng_trace_chunk_put(current_trace_chunk);
673 }
674 return stream;
675
676 error_no_alloc:
677 /*
678 * path_name and channel_name need to be freed explicitly here
679 * because we cannot rely on stream_put().
680 */
681 free(path_name);
682 free(channel_name);
683 return NULL;
684 }
685
686 /*
687 * Called with the session lock held.
688 */
689 void stream_publish(struct relay_stream *stream)
690 {
691 struct relay_session *session;
692
693 pthread_mutex_lock(&stream->lock);
694 if (stream->published) {
695 goto unlock;
696 }
697
698 session = stream->trace->session;
699
700 pthread_mutex_lock(&session->recv_list_lock);
701 if (stream->in_recv_list) {
702 cds_list_del_rcu(&stream->recv_node);
703 stream->in_recv_list = false;
704 }
705 pthread_mutex_unlock(&session->recv_list_lock);
706
707 pthread_mutex_lock(&stream->trace->stream_list_lock);
708 cds_list_add_rcu(&stream->stream_node, &stream->trace->stream_list);
709 pthread_mutex_unlock(&stream->trace->stream_list_lock);
710
711 stream->published = true;
712 unlock:
713 pthread_mutex_unlock(&stream->lock);
714 }
715
716 /*
717 * Stream must be protected by holding the stream lock or by virtue of being
718 * called from stream_destroy.
719 */
720 static void stream_unpublish(struct relay_stream *stream)
721 {
722 if (stream->in_stream_ht) {
723 struct lttng_ht_iter iter;
724 int ret;
725
726 iter.iter.node = &stream->node.node;
727 ret = lttng_ht_del(relay_streams_ht, &iter);
728 assert(!ret);
729 stream->in_stream_ht = false;
730 }
731 if (stream->published) {
732 pthread_mutex_lock(&stream->trace->stream_list_lock);
733 cds_list_del_rcu(&stream->stream_node);
734 pthread_mutex_unlock(&stream->trace->stream_list_lock);
735 stream->published = false;
736 }
737 }
738
739 static void stream_destroy(struct relay_stream *stream)
740 {
741 if (stream->indexes_ht) {
742 /*
743 * Calling lttng_ht_destroy in call_rcu worker thread so
744 * we don't hold the RCU read-side lock while calling
745 * it.
746 */
747 lttng_ht_destroy(stream->indexes_ht);
748 }
749 if (stream->tfa) {
750 tracefile_array_destroy(stream->tfa);
751 }
752 free(stream->path_name);
753 free(stream->channel_name);
754 free(stream);
755 }
756
757 static void stream_destroy_rcu(struct rcu_head *rcu_head)
758 {
759 struct relay_stream *stream =
760 caa_container_of(rcu_head, struct relay_stream, rcu_node);
761
762 stream_destroy(stream);
763 }
764
765 /*
766 * No need to take stream->lock since this is only called on the final
767 * stream_put which ensures that a single thread may act on the stream.
768 */
769 static void stream_release(struct urcu_ref *ref)
770 {
771 struct relay_stream *stream =
772 caa_container_of(ref, struct relay_stream, ref);
773 struct relay_session *session;
774
775 session = stream->trace->session;
776
777 DBG("Releasing stream id %" PRIu64, stream->stream_handle);
778
779 pthread_mutex_lock(&session->recv_list_lock);
780 session->stream_count--;
781 if (stream->in_recv_list) {
782 cds_list_del_rcu(&stream->recv_node);
783 stream->in_recv_list = false;
784 }
785 pthread_mutex_unlock(&session->recv_list_lock);
786
787 stream_unpublish(stream);
788
789 if (stream->file) {
790 fs_handle_close(stream->file);
791 stream->file = NULL;
792 }
793 if (stream->index_file) {
794 lttng_index_file_put(stream->index_file);
795 stream->index_file = NULL;
796 }
797 if (stream->trace) {
798 ctf_trace_put(stream->trace);
799 stream->trace = NULL;
800 }
801 stream_complete_rotation(stream);
802 lttng_trace_chunk_put(stream->trace_chunk);
803 stream->trace_chunk = NULL;
804
805 call_rcu(&stream->rcu_node, stream_destroy_rcu);
806 }
807
808 void stream_put(struct relay_stream *stream)
809 {
810 rcu_read_lock();
811 assert(stream->ref.refcount != 0);
812 /*
813 * Wait until we have processed all the stream packets before
814 * actually putting our last stream reference.
815 */
816 urcu_ref_put(&stream->ref, stream_release);
817 rcu_read_unlock();
818 }
819
820 int stream_set_pending_rotation(struct relay_stream *stream,
821 struct lttng_trace_chunk *next_trace_chunk,
822 uint64_t rotation_sequence_number)
823 {
824 int ret = 0;
825 const struct relay_stream_rotation rotation = {
826 .data_rotated = false,
827 .index_rotated = false,
828 .packet_seq_num = rotation_sequence_number,
829 .prev_data_net_seq = -1ULL,
830 .next_trace_chunk = next_trace_chunk,
831 };
832
833 if (stream->ongoing_rotation.is_set) {
834 ERR("Attempted to set a pending rotation on a stream already being rotated (protocol error)");
835 ret = -1;
836 goto end;
837 }
838
839 if (next_trace_chunk) {
840 const bool reference_acquired =
841 lttng_trace_chunk_get(next_trace_chunk);
842
843 assert(reference_acquired);
844 }
845 LTTNG_OPTIONAL_SET(&stream->ongoing_rotation, rotation);
846
847 DBG("Setting pending rotation: stream_id = %" PRIu64
848 ", rotate_at_packet_seq_num = %" PRIu64,
849 stream->stream_handle, rotation_sequence_number);
850 if (stream->is_metadata) {
851 /*
852 * A metadata stream has no index; consider it already rotated.
853 */
854 stream->ongoing_rotation.value.index_rotated = true;
855 if (next_trace_chunk) {
856 /*
857 * The metadata will be received again in the new chunk.
858 */
859 stream->metadata_received = 0;
860 }
861 ret = stream_rotate_data_file(stream);
862 } else {
863 ret = try_rotate_stream_index(stream);
864 if (ret < 0) {
865 goto end;
866 }
867
868 ret = try_rotate_stream_data(stream);
869 if (ret < 0) {
870 goto end;
871 }
872 }
873 end:
874 return ret;
875 }
876
877 void try_stream_close(struct relay_stream *stream)
878 {
879 bool session_aborted;
880 struct relay_session *session = stream->trace->session;
881
882 DBG("Trying to close stream %" PRIu64, stream->stream_handle);
883
884 pthread_mutex_lock(&session->lock);
885 session_aborted = session->aborted;
886 pthread_mutex_unlock(&session->lock);
887
888 pthread_mutex_lock(&stream->lock);
889 /*
890 * Can be called concurently by connection close and reception of last
891 * pending data.
892 */
893 if (stream->closed) {
894 pthread_mutex_unlock(&stream->lock);
895 DBG("closing stream %" PRIu64 " aborted since it is already marked as closed", stream->stream_handle);
896 return;
897 }
898
899 stream->close_requested = true;
900
901 if (stream->last_net_seq_num == -1ULL) {
902 /*
903 * Handle connection close without explicit stream close
904 * command.
905 *
906 * We can be clever about indexes partially received in
907 * cases where we received the data socket part, but not
908 * the control socket part: since we're currently closing
909 * the stream on behalf of the control socket, we *know*
910 * there won't be any more control information for this
911 * socket. Therefore, we can destroy all indexes for
912 * which we have received only the file descriptor (from
913 * data socket). This takes care of consumerd crashes
914 * between sending the data and control information for
915 * a packet. Since those are sent in that order, we take
916 * care of consumerd crashes.
917 */
918 DBG("relay_index_close_partial_fd");
919 relay_index_close_partial_fd(stream);
920 /*
921 * Use the highest net_seq_num we currently have pending
922 * As end of stream indicator. Leave last_net_seq_num
923 * at -1ULL if we cannot find any index.
924 */
925 stream->last_net_seq_num = relay_index_find_last(stream);
926 DBG("Updating stream->last_net_seq_num to %" PRIu64, stream->last_net_seq_num);
927 /* Fall-through into the next check. */
928 }
929
930 if (stream->last_net_seq_num != -1ULL &&
931 ((int64_t) (stream->prev_data_seq - stream->last_net_seq_num)) < 0
932 && !session_aborted) {
933 /*
934 * Don't close since we still have data pending. This
935 * handles cases where an explicit close command has
936 * been received for this stream, and cases where the
937 * connection has been closed, and we are awaiting for
938 * index information from the data socket. It is
939 * therefore expected that all the index fd information
940 * we need has already been received on the control
941 * socket. Matching index information from data socket
942 * should be Expected Soon(TM).
943 *
944 * TODO: We should implement a timer to garbage collect
945 * streams after a timeout to be resilient against a
946 * consumerd implementation that would not match this
947 * expected behavior.
948 */
949 pthread_mutex_unlock(&stream->lock);
950 DBG("closing stream %" PRIu64 " aborted since it still has data pending", stream->stream_handle);
951 return;
952 }
953 /*
954 * We received all the indexes we can expect.
955 */
956 stream_unpublish(stream);
957 stream->closed = true;
958 /* Relay indexes are only used by the "consumer/sessiond" end. */
959 relay_index_close_all(stream);
960
961 /*
962 * If we are closed by an application exiting (per-pid buffers),
963 * we need to put our reference on the stream trace chunk right
964 * away, because otherwise still holding the reference on the
965 * trace chunk could allow a viewer stream (which holds a reference
966 * to the stream) to postpone destroy waiting for the chunk to cease
967 * to exist endlessly until the viewer is detached.
968 */
969
970 /* Put stream fd before put chunk. */
971 if (stream->file) {
972 fs_handle_close(stream->file);
973 stream->file = NULL;
974 }
975 if (stream->index_file) {
976 lttng_index_file_put(stream->index_file);
977 stream->index_file = NULL;
978 }
979 lttng_trace_chunk_put(stream->trace_chunk);
980 stream->trace_chunk = NULL;
981 pthread_mutex_unlock(&stream->lock);
982 DBG("Succeeded in closing stream %" PRIu64, stream->stream_handle);
983 stream_put(stream);
984 }
985
986 int stream_init_packet(struct relay_stream *stream, size_t packet_size,
987 bool *file_rotated)
988 {
989 int ret = 0;
990
991 ASSERT_LOCKED(stream->lock);
992
993 if (!stream->file || !stream->trace_chunk) {
994 ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
995 stream->stream_handle, stream->channel_name);
996 ret = -1;
997 goto end;
998 }
999
1000 if (caa_likely(stream->tracefile_size == 0)) {
1001 /* No size limit set; nothing to check. */
1002 goto end;
1003 }
1004
1005 /*
1006 * Check if writing the new packet would exceed the maximal file size.
1007 */
1008 if (caa_unlikely((stream->tracefile_size_current + packet_size) >
1009 stream->tracefile_size)) {
1010 const uint64_t new_file_index =
1011 (stream->tracefile_current_index + 1) %
1012 stream->tracefile_count;
1013
1014 if (new_file_index < stream->tracefile_current_index) {
1015 stream->tracefile_wrapped_around = true;
1016 }
1017 DBG("New stream packet causes stream file rotation: stream_id = %" PRIu64
1018 ", current_file_size = %" PRIu64
1019 ", packet_size = %zu, current_file_index = %" PRIu64
1020 " new_file_index = %" PRIu64,
1021 stream->stream_handle,
1022 stream->tracefile_size_current, packet_size,
1023 stream->tracefile_current_index, new_file_index);
1024 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_WRITE);
1025 stream->tracefile_current_index = new_file_index;
1026
1027 if (stream->file) {
1028 fs_handle_close(stream->file);
1029 stream->file = NULL;
1030 }
1031 ret = stream_create_data_output_file_from_trace_chunk(stream,
1032 stream->trace_chunk, false, &stream->file);
1033 if (ret) {
1034 ERR("Failed to perform trace file rotation of stream %" PRIu64,
1035 stream->stream_handle);
1036 goto end;
1037 }
1038
1039 /*
1040 * Reset current size because we just performed a stream
1041 * rotation.
1042 */
1043 DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
1044 __func__, stream->stream_handle, stream->tracefile_size_current);
1045 stream->tracefile_size_current = 0;
1046 *file_rotated = true;
1047 } else {
1048 *file_rotated = false;
1049 }
1050 end:
1051 return ret;
1052 }
1053
1054 /* Note that the packet is not necessarily complete. */
1055 int stream_write(struct relay_stream *stream,
1056 const struct lttng_buffer_view *packet, size_t padding_len)
1057 {
1058 int ret = 0;
1059 ssize_t write_ret;
1060 size_t padding_to_write = padding_len;
1061 char padding_buffer[FILE_IO_STACK_BUFFER_SIZE];
1062
1063 ASSERT_LOCKED(stream->lock);
1064 memset(padding_buffer, 0,
1065 min(sizeof(padding_buffer), padding_to_write));
1066
1067 if (!stream->file || !stream->trace_chunk) {
1068 ERR("Protocol error: received a packet for a stream that doesn't have a current trace chunk: stream_id = %" PRIu64 ", channel_name = %s",
1069 stream->stream_handle, stream->channel_name);
1070 ret = -1;
1071 goto end;
1072 }
1073 if (packet) {
1074 write_ret = fs_handle_write(
1075 stream->file, packet->data, packet->size);
1076 if (write_ret != packet->size) {
1077 PERROR("Failed to write to stream file of %sstream %" PRIu64,
1078 stream->is_metadata ? "metadata " : "",
1079 stream->stream_handle);
1080 ret = -1;
1081 goto end;
1082 }
1083 }
1084
1085 while (padding_to_write > 0) {
1086 const size_t padding_to_write_this_pass =
1087 min(padding_to_write, sizeof(padding_buffer));
1088
1089 write_ret = fs_handle_write(stream->file, padding_buffer,
1090 padding_to_write_this_pass);
1091 if (write_ret != padding_to_write_this_pass) {
1092 PERROR("Failed to write padding to file of %sstream %" PRIu64,
1093 stream->is_metadata ? "metadata " : "",
1094 stream->stream_handle);
1095 ret = -1;
1096 goto end;
1097 }
1098 padding_to_write -= padding_to_write_this_pass;
1099 }
1100
1101 if (stream->is_metadata) {
1102 size_t recv_len;
1103
1104 recv_len = packet ? packet->size : 0;
1105 recv_len += padding_len;
1106 stream->metadata_received += recv_len;
1107 if (recv_len) {
1108 stream->no_new_metadata_notified = false;
1109 }
1110 }
1111
1112 DBG("Wrote to %sstream %" PRIu64 ": data_length = %zu, padding_length = %zu",
1113 stream->is_metadata ? "metadata " : "",
1114 stream->stream_handle,
1115 packet ? packet->size : (size_t) 0, padding_len);
1116 end:
1117 return ret;
1118 }
1119
1120 /*
1121 * Update index after receiving a packet for a data stream.
1122 *
1123 * Called with the stream lock held.
1124 *
1125 * Return 0 on success else a negative value.
1126 */
1127 int stream_update_index(struct relay_stream *stream, uint64_t net_seq_num,
1128 bool rotate_index, bool *flushed, uint64_t total_size)
1129 {
1130 int ret = 0;
1131 uint64_t data_offset;
1132 struct relay_index *index;
1133
1134 assert(stream->trace_chunk);
1135 ASSERT_LOCKED(stream->lock);
1136 /* Get data offset because we are about to update the index. */
1137 data_offset = htobe64(stream->tracefile_size_current);
1138
1139 DBG("handle_index_data: stream %" PRIu64 " net_seq_num %" PRIu64 " data offset %" PRIu64,
1140 stream->stream_handle, net_seq_num, stream->tracefile_size_current);
1141
1142 /*
1143 * Lookup for an existing index for that stream id/sequence
1144 * number. If it exists, the control thread has already received the
1145 * data for it, thus we need to write it to disk.
1146 */
1147 index = relay_index_get_by_id_or_create(stream, net_seq_num);
1148 if (!index) {
1149 ret = -1;
1150 goto end;
1151 }
1152
1153 if (rotate_index || !stream->index_file) {
1154 ret = create_index_file(stream, stream->trace_chunk);
1155 if (ret) {
1156 ERR("Failed to create index file for stream %" PRIu64,
1157 stream->stream_handle);
1158 /* Put self-ref for this index due to error. */
1159 relay_index_put(index);
1160 index = NULL;
1161 goto end;
1162 }
1163 }
1164
1165 if (relay_index_set_file(index, stream->index_file, data_offset)) {
1166 ret = -1;
1167 /* Put self-ref for this index due to error. */
1168 relay_index_put(index);
1169 index = NULL;
1170 goto end;
1171 }
1172
1173 ret = relay_index_try_flush(index);
1174 if (ret == 0) {
1175 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
1176 tracefile_array_commit_seq(stream->tfa, stream->index_received_seqcount);
1177 stream->index_received_seqcount++;
1178 LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
1179 be64toh(index->index_data.packet_seq_num));
1180 *flushed = true;
1181 } else if (ret > 0) {
1182 index->total_size = total_size;
1183 /* No flush. */
1184 ret = 0;
1185 } else {
1186 /*
1187 * ret < 0
1188 *
1189 * relay_index_try_flush is responsible for the self-reference
1190 * put of the index object on error.
1191 */
1192 ERR("relay_index_try_flush error %d", ret);
1193 ret = -1;
1194 }
1195 end:
1196 return ret;
1197 }
1198
1199 int stream_complete_packet(struct relay_stream *stream, size_t packet_total_size,
1200 uint64_t sequence_number, bool index_flushed)
1201 {
1202 int ret = 0;
1203
1204 ASSERT_LOCKED(stream->lock);
1205
1206 stream->tracefile_size_current += packet_total_size;
1207 if (index_flushed) {
1208 stream->pos_after_last_complete_data_index =
1209 stream->tracefile_size_current;
1210 stream->prev_index_seq = sequence_number;
1211 ret = try_rotate_stream_index(stream);
1212 if (ret < 0) {
1213 goto end;
1214 }
1215 }
1216
1217 stream->prev_data_seq = sequence_number;
1218 ret = try_rotate_stream_data(stream);
1219
1220 end:
1221 return ret;
1222 }
1223
1224 int stream_add_index(struct relay_stream *stream,
1225 const struct lttcomm_relayd_index *index_info)
1226 {
1227 int ret = 0;
1228 struct relay_index *index;
1229
1230 ASSERT_LOCKED(stream->lock);
1231
1232 DBG("stream_add_index for stream %" PRIu64, stream->stream_handle);
1233
1234 /* Live beacon handling */
1235 if (index_info->packet_size == 0) {
1236 DBG("Received live beacon for stream %" PRIu64,
1237 stream->stream_handle);
1238
1239 /*
1240 * Only flag a stream inactive when it has already
1241 * received data and no indexes are in flight.
1242 */
1243 if (stream->index_received_seqcount > 0
1244 && stream->indexes_in_flight == 0) {
1245 stream->beacon_ts_end = index_info->timestamp_end;
1246 }
1247 ret = 0;
1248 goto end;
1249 } else {
1250 stream->beacon_ts_end = -1ULL;
1251 }
1252
1253 if (stream->ctf_stream_id == -1ULL) {
1254 stream->ctf_stream_id = index_info->stream_id;
1255 }
1256
1257 index = relay_index_get_by_id_or_create(stream, index_info->net_seq_num);
1258 if (!index) {
1259 ret = -1;
1260 ERR("Failed to get or create index %" PRIu64,
1261 index_info->net_seq_num);
1262 goto end;
1263 }
1264 if (relay_index_set_control_data(index, index_info,
1265 stream->trace->session->minor)) {
1266 ERR("set_index_control_data error");
1267 relay_index_put(index);
1268 ret = -1;
1269 goto end;
1270 }
1271 ret = relay_index_try_flush(index);
1272 if (ret == 0) {
1273 tracefile_array_file_rotate(stream->tfa, TRACEFILE_ROTATE_READ);
1274 tracefile_array_commit_seq(stream->tfa, stream->index_received_seqcount);
1275 stream->index_received_seqcount++;
1276 stream->pos_after_last_complete_data_index += index->total_size;
1277 stream->prev_index_seq = index_info->net_seq_num;
1278 LTTNG_OPTIONAL_SET(&stream->received_packet_seq_num,
1279 index_info->packet_seq_num);
1280
1281 ret = try_rotate_stream_index(stream);
1282 if (ret < 0) {
1283 goto end;
1284 }
1285 ret = try_rotate_stream_data(stream);
1286 if (ret < 0) {
1287 goto end;
1288 }
1289 } else if (ret > 0) {
1290 /* no flush. */
1291 ret = 0;
1292 } else {
1293 /*
1294 * ret < 0
1295 *
1296 * relay_index_try_flush is responsible for the self-reference
1297 * put of the index object on error.
1298 */
1299 ERR("relay_index_try_flush error %d", ret);
1300 ret = -1;
1301 }
1302 end:
1303 return ret;
1304 }
1305
1306 static void print_stream_indexes(struct relay_stream *stream)
1307 {
1308 struct lttng_ht_iter iter;
1309 struct relay_index *index;
1310
1311 rcu_read_lock();
1312 cds_lfht_for_each_entry(stream->indexes_ht->ht, &iter.iter, index,
1313 index_n.node) {
1314 DBG("index %p net_seq_num %" PRIu64 " refcount %ld"
1315 " stream %" PRIu64 " trace %" PRIu64
1316 " session %" PRIu64,
1317 index,
1318 index->index_n.key,
1319 stream->ref.refcount,
1320 index->stream->stream_handle,
1321 index->stream->trace->id,
1322 index->stream->trace->session->id);
1323 }
1324 rcu_read_unlock();
1325 }
1326
1327 int stream_reset_file(struct relay_stream *stream)
1328 {
1329 ASSERT_LOCKED(stream->lock);
1330
1331 if (stream->file) {
1332 int ret;
1333
1334 ret = fs_handle_close(stream->file);
1335 if (ret) {
1336 ERR("Failed to close stream file handle: channel name = \"%s\", id = %" PRIu64,
1337 stream->channel_name,
1338 stream->stream_handle);
1339 }
1340 stream->file = NULL;
1341 }
1342
1343 DBG("%s: reset tracefile_size_current for stream %" PRIu64 " was %" PRIu64,
1344 __func__, stream->stream_handle, stream->tracefile_size_current);
1345 stream->tracefile_size_current = 0;
1346 stream->prev_data_seq = 0;
1347 stream->prev_index_seq = 0;
1348 /* Note that this does not reset the tracefile array. */
1349 stream->tracefile_current_index = 0;
1350 stream->pos_after_last_complete_data_index = 0;
1351
1352 return stream_create_data_output_file_from_trace_chunk(stream,
1353 stream->trace_chunk, true, &stream->file);
1354 }
1355
1356 void print_relay_streams(void)
1357 {
1358 struct lttng_ht_iter iter;
1359 struct relay_stream *stream;
1360
1361 if (!relay_streams_ht) {
1362 return;
1363 }
1364
1365 rcu_read_lock();
1366 cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream,
1367 node.node) {
1368 if (!stream_get(stream)) {
1369 continue;
1370 }
1371 DBG("stream %p refcount %ld stream %" PRIu64 " trace %" PRIu64
1372 " session %" PRIu64,
1373 stream,
1374 stream->ref.refcount,
1375 stream->stream_handle,
1376 stream->trace->id,
1377 stream->trace->session->id);
1378 print_stream_indexes(stream);
1379 stream_put(stream);
1380 }
1381 rcu_read_unlock();
1382 }
This page took 0.093883 seconds and 3 git commands to generate.