X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Flive.cpp;h=b84a12f35b8d791a5ea204c36a3d2ff92ffa97ae;hb=7e4fb89bb6323adc132f018184bb159fd8dd7727;hp=92c3bf4a7831d8ac6195009e08af8dc5e6fceee0;hpb=ac497a37018f3c253d2e50397294f58d33f7f24f;p=lttng-tools.git diff --git a/src/bin/lttng-relayd/live.cpp b/src/bin/lttng-relayd/live.cpp index 92c3bf4a7..b84a12f35 100644 --- a/src/bin/lttng-relayd/live.cpp +++ b/src/bin/lttng-relayd/live.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -88,6 +89,55 @@ static uint64_t last_relay_viewer_session_id; static pthread_mutex_t last_relay_viewer_session_id_lock = PTHREAD_MUTEX_INITIALIZER; +static +const char *lttng_viewer_command_str(lttng_viewer_command cmd) +{ + switch (cmd) { + case LTTNG_VIEWER_CONNECT: + return "CONNECT"; + case LTTNG_VIEWER_LIST_SESSIONS: + return "LIST_SESSIONS"; + case LTTNG_VIEWER_ATTACH_SESSION: + return "ATTACH_SESSION"; + case LTTNG_VIEWER_GET_NEXT_INDEX: + return "GET_NEXT_INDEX"; + case LTTNG_VIEWER_GET_PACKET: + return "GET_PACKET"; + case LTTNG_VIEWER_GET_METADATA: + return "GET_METADATA"; + case LTTNG_VIEWER_GET_NEW_STREAMS: + return "GET_NEW_STREAMS"; + case LTTNG_VIEWER_CREATE_SESSION: + return "CREATE_SESSION"; + case LTTNG_VIEWER_DETACH_SESSION: + return "DETACH_SESSION"; + default: + abort(); + } +} + +static +const char *lttng_viewer_next_index_return_code_str( + enum lttng_viewer_next_index_return_code code) +{ + switch (code) { + case LTTNG_VIEWER_INDEX_OK: + return "INDEX_OK"; + case LTTNG_VIEWER_INDEX_RETRY: + return "INDEX_RETRY"; + case LTTNG_VIEWER_INDEX_HUP: + return "INDEX_HUP"; + case LTTNG_VIEWER_INDEX_ERR: + return "INDEX_ERR"; + case LTTNG_VIEWER_INDEX_INACTIVE: + return "INDEX_INACTIVE"; + case LTTNG_VIEWER_INDEX_EOF: + return "INDEX_EOF"; + default: + abort(); + } +} + /* * Cleanup the daemon */ @@ -936,8 +986,6 @@ int viewer_connect(struct relay_connection *conn) health_code_update(); - DBG("Viewer is establishing a connection to the relayd."); - ret = recv_request(conn->sock, &msg, sizeof(msg)); if (ret < 0) { goto end; @@ -1024,8 +1072,6 @@ int viewer_list_sessions(struct relay_connection *conn) uint32_t buf_count = SESSION_BUF_DEFAULT_COUNT; uint32_t count = 0; - DBG("List sessions received"); - send_session_buf = (lttng_viewer_session *) zmalloc(SESSION_BUF_DEFAULT_COUNT * sizeof(*send_session_buf)); if (!send_session_buf) { return -1; @@ -1130,8 +1176,6 @@ int viewer_get_new_streams(struct relay_connection *conn) LTTNG_ASSERT(conn); - DBG("Get new streams received"); - health_code_update(); /* Receive the request from the connected client. */ @@ -1167,6 +1211,16 @@ int viewer_get_new_streams(struct relay_connection *conn) * the viewer's point of view. */ pthread_mutex_lock(&session->lock); + /* + * If a session rotation is ongoing, do not attempt to open any + * stream, because the chunk can be in an intermediate state + * due to directory renaming. + */ + if (session->ongoing_rotation) { + DBG("Relay session %" PRIu64 " rotation ongoing", session_id); + response.status = htobe32(LTTNG_VIEWER_NEW_STREAMS_NO_NEW); + goto send_reply_unlock; + } ret = make_viewer_streams(session, conn->viewer_session, LTTNG_VIEWER_SEEK_BEGINNING, &nb_total, &nb_unsent, @@ -1307,6 +1361,17 @@ int viewer_attach_session(struct relay_connection *conn) goto send_reply; } + /* + * If a session rotation is ongoing, do not attempt to open any + * stream, because the chunk can be in an intermediate state + * due to directory renaming. + */ + if (session->ongoing_rotation) { + DBG("Relay session %" PRIu64 " rotation ongoing", session_id); + send_streams = 0; + goto send_reply; + } + ret = make_viewer_streams(session, conn->viewer_session, seek_type, &nb_streams, NULL, NULL, &closed); @@ -1443,7 +1508,13 @@ static int check_index_status(struct relay_viewer_stream *vstream, * Last index sent and session connection or relay * stream are closed. */ - index->status = htobe32(LTTNG_VIEWER_INDEX_HUP); + index->status = LTTNG_VIEWER_INDEX_HUP; + DBG("Check index status: Connection or stream are closed, stream %" PRIu64 + ",connection-closed=%d, relay-stream-closed=%d, returning status=%s", + vstream->stream->stream_handle, + trace->session->connection_closed, rstream->closed, + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) index->status)); goto hup; } else if (rstream->beacon_ts_end != -1ULL && (rstream->index_received_seqcount == 0 || @@ -1465,11 +1536,14 @@ static int check_index_status(struct relay_viewer_stream *vstream, * viewer_stream_sync_tracefile_array_tail) and skip over * packet sequence numbers. */ - index->status = htobe32(LTTNG_VIEWER_INDEX_INACTIVE); + index->status = LTTNG_VIEWER_INDEX_INACTIVE; index->timestamp_end = htobe64(rstream->beacon_ts_end); index->stream_id = htobe64(rstream->ctf_stream_id); - DBG("Check index status: inactive with beacon, for stream %" PRIu64, - vstream->stream->stream_handle); + DBG("Check index status: inactive with beacon, for stream %" PRIu64 + ", returning status=%s", + vstream->stream->stream_handle, + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) index->status)); goto index_ready; } else if (rstream->index_received_seqcount == 0 || (vstream->index_sent_seqcount != 0 && @@ -1486,9 +1560,13 @@ static int check_index_status(struct relay_viewer_stream *vstream, * viewer_stream_sync_tracefile_array_tail) and skip over * packet sequence numbers. */ - index->status = htobe32(LTTNG_VIEWER_INDEX_RETRY); - DBG("Check index status: retry for stream %" PRIu64, - vstream->stream->stream_handle); + index->status = LTTNG_VIEWER_INDEX_RETRY; + DBG("Check index status:" + "did not received beacon for stream %" PRIu64 + ", returning status=%s", + vstream->stream->stream_handle, + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) index->status)); goto index_ready; } else if (!tracefile_array_seq_in_file(rstream->tfa, vstream->current_tracefile_id, @@ -1503,7 +1581,13 @@ static int check_index_status(struct relay_viewer_stream *vstream, ret = viewer_stream_rotate(vstream); if (ret == 1) { /* EOF across entire stream. */ - index->status = htobe32(LTTNG_VIEWER_INDEX_HUP); + index->status = LTTNG_VIEWER_INDEX_HUP; + DBG("Check index status:" + "reached end of file for stream %" PRIu64 + ", returning status=%s", + vstream->stream->stream_handle, + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) index->status)); goto hup; } /* @@ -1525,12 +1609,15 @@ static int check_index_status(struct relay_viewer_stream *vstream, rstream->tfa, vstream->current_tracefile_id, vstream->index_sent_seqcount)) { - index->status = htobe32(LTTNG_VIEWER_INDEX_RETRY); - DBG("Check index status: retry: " + index->status = LTTNG_VIEWER_INDEX_RETRY; + DBG("Check index status:" "tracefile array sequence number %" PRIu64 - " not in file for stream %" PRIu64, + " not in file for stream %" PRIu64 + ", returning status=%s", vstream->index_sent_seqcount, - vstream->stream->stream_handle); + vstream->stream->stream_handle, + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) index->status)); goto index_ready; } LTTNG_ASSERT(tracefile_array_seq_in_file(rstream->tfa, @@ -1581,11 +1668,12 @@ int viewer_get_next_index(struct relay_connection *conn) struct relay_stream *rstream = NULL; struct ctf_trace *ctf_trace = NULL; struct relay_viewer_stream *metadata_viewer_stream = NULL; + bool viewer_stream_and_session_in_same_chunk, viewer_stream_one_rotation_behind; + uint64_t stream_file_chunk_id = -1ULL, viewer_session_chunk_id = -1ULL; + enum lttng_trace_chunk_status status; LTTNG_ASSERT(conn); - DBG("Viewer get next index"); - memset(&viewer_index, 0, sizeof(viewer_index)); health_code_update(); @@ -1597,9 +1685,11 @@ int viewer_get_next_index(struct relay_connection *conn) vstream = viewer_stream_get_by_id(be64toh(request_index.stream_id)); if (!vstream) { - DBG("Client requested index of unknown stream id %" PRIu64, - (uint64_t) be64toh(request_index.stream_id)); - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); + viewer_index.status = LTTNG_VIEWER_INDEX_ERR; + DBG("Client requested index of unknown stream id %" PRIu64", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } @@ -1611,25 +1701,44 @@ int viewer_get_next_index(struct relay_connection *conn) metadata_viewer_stream = ctf_trace_get_viewer_metadata_stream(ctf_trace); + /* + * Hold the session lock to protect against concurrent changes + * to the chunk files (e.g. rename done by clear), which are + * protected by the session ongoing rotation state. Those are + * synchronized with the session lock. + */ + pthread_mutex_lock(&rstream->trace->session->lock); pthread_mutex_lock(&rstream->lock); /* * The viewer should not ask for index on metadata stream. */ if (rstream->is_metadata) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP); + viewer_index.status = LTTNG_VIEWER_INDEX_HUP; + DBG("Client requested index of a metadata stream id %" PRIu64", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } if (rstream->ongoing_rotation.is_set) { /* Rotation is ongoing, try again later. */ - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY); + viewer_index.status = LTTNG_VIEWER_INDEX_RETRY; + DBG("Client requested index for stream id %" PRIu64" while a stream rotation is ongoing, returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } if (rstream->trace->session->ongoing_rotation) { /* Rotation is ongoing, try again later. */ - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY); + viewer_index.status = LTTNG_VIEWER_INDEX_RETRY; + DBG("Client requested index for stream id %" PRIu64" while a session rotation is ongoing, returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } @@ -1645,7 +1754,12 @@ int viewer_get_next_index(struct relay_connection *conn) conn->viewer_session, rstream->trace_chunk); if (ret) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); + viewer_index.status = LTTNG_VIEWER_INDEX_ERR; + ERR("Error copying trace chunk for stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } } @@ -1661,12 +1775,50 @@ int viewer_get_next_index(struct relay_connection *conn) * This allows clients to consume all the packets of a trace chunk * after a session's destruction. */ - if (conn->viewer_session->current_trace_chunk != vstream->stream_file.trace_chunk && - !(rstream->completed_rotation_count == vstream->last_seen_rotation_count + 1 && !rstream->trace_chunk)) { - DBG("Viewer session and viewer stream chunk differ: " - "vsession chunk %p vstream chunk %p", + if (vstream->stream_file.trace_chunk) { + status = lttng_trace_chunk_get_id( + vstream->stream_file.trace_chunk, + &stream_file_chunk_id); + LTTNG_ASSERT(status == LTTNG_TRACE_CHUNK_STATUS_OK); + } + if (conn->viewer_session->current_trace_chunk) { + status = lttng_trace_chunk_get_id( conn->viewer_session->current_trace_chunk, - vstream->stream_file.trace_chunk); + &viewer_session_chunk_id); + LTTNG_ASSERT(status == LTTNG_TRACE_CHUNK_STATUS_OK); + } + + viewer_stream_and_session_in_same_chunk = lttng_trace_chunk_ids_equal( + conn->viewer_session->current_trace_chunk, + vstream->stream_file.trace_chunk); + viewer_stream_one_rotation_behind = rstream->completed_rotation_count == + vstream->last_seen_rotation_count + 1; + + if (viewer_stream_and_session_in_same_chunk) { + DBG("Transition to latest chunk check (%s -> %s): Same chunk, no need to rotate", + vstream->stream_file.trace_chunk ? + std::to_string(stream_file_chunk_id).c_str() : + "None", + conn->viewer_session->current_trace_chunk ? + std::to_string(viewer_session_chunk_id).c_str() : + "None"); + } else if (viewer_stream_one_rotation_behind && !rstream->trace_chunk) { + DBG("Transition to latest chunk check (%s -> %s): One chunk behind relay stream which is being destroyed, no need to rotate", + vstream->stream_file.trace_chunk ? + std::to_string(stream_file_chunk_id).c_str() : + "None", + conn->viewer_session->current_trace_chunk ? + std::to_string(viewer_session_chunk_id).c_str() : + "None"); + } else { + DBG("Transition to latest chunk check (%s -> %s): Viewer stream chunk ID and viewer session chunk ID differ, rotating viewer stream", + vstream->stream_file.trace_chunk ? + std::to_string(stream_file_chunk_id).c_str() : + "None", + conn->viewer_session->current_trace_chunk ? + std::to_string(viewer_session_chunk_id).c_str() : + "None"); + viewer_stream_rotate_to_trace_chunk(vstream, conn->viewer_session->current_trace_chunk); vstream->last_seen_rotation_count = @@ -1690,15 +1842,30 @@ int viewer_get_next_index(struct relay_connection *conn) ret = try_open_index(vstream, rstream); if (ret == -ENOENT) { if (rstream->closed) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP); + viewer_index.status = LTTNG_VIEWER_INDEX_HUP; + DBG("Cannot open index for stream id %" PRIu64 + "stream is closed, returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } else { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_RETRY); + viewer_index.status = LTTNG_VIEWER_INDEX_RETRY; + DBG("Cannot open index for stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } } if (ret < 0) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); + viewer_index.status = LTTNG_VIEWER_INDEX_ERR; + ERR("Error opening index for stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } @@ -1711,7 +1878,6 @@ int viewer_get_next_index(struct relay_connection *conn) */ if (!vstream->stream_file.handle) { char file_path[LTTNG_PATH_MAX]; - enum lttng_trace_chunk_status status; struct fs_handle *fs_handle; ret = utils_stream_file_path(rstream->path_name, @@ -1733,7 +1899,12 @@ int viewer_get_next_index(struct relay_connection *conn) if (status != LTTNG_TRACE_CHUNK_STATUS_OK) { if (status == LTTNG_TRACE_CHUNK_STATUS_NO_FILE && rstream->closed) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_HUP); + viewer_index.status = LTTNG_VIEWER_INDEX_HUP; + DBG("Cannot find trace chunk file and stream is closed for stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } PERROR("Failed to open trace file for viewer stream"); @@ -1744,7 +1915,12 @@ int viewer_get_next_index(struct relay_connection *conn) ret = check_new_streams(conn); if (ret < 0) { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); + viewer_index.status = LTTNG_VIEWER_INDEX_ERR; + ERR("Error checking for new streams before sending new index to stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } else if (ret == 1) { viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM; @@ -1752,11 +1928,20 @@ int viewer_get_next_index(struct relay_connection *conn) ret = lttng_index_file_read(vstream->index_file, &packet_index); if (ret) { - ERR("Relay error reading index file"); - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_ERR); + viewer_index.status = LTTNG_VIEWER_INDEX_ERR; + ERR("Relay error reading index file for stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); goto send_reply; } else { - viewer_index.status = htobe32(LTTNG_VIEWER_INDEX_OK); + viewer_index.status = LTTNG_VIEWER_INDEX_OK; + DBG("Read index file for stream id %" PRIu64 + ", returning status=%s", + (uint64_t) be64toh(request_index.stream_id), + lttng_viewer_next_index_return_code_str( + (enum lttng_viewer_next_index_return_code) viewer_index.status)); vstream->index_sent_seqcount++; } @@ -1777,6 +1962,7 @@ int viewer_get_next_index(struct relay_connection *conn) send_reply: if (rstream) { pthread_mutex_unlock(&rstream->lock); + pthread_mutex_unlock(&rstream->trace->session->lock); } if (metadata_viewer_stream) { @@ -1794,6 +1980,7 @@ send_reply: } viewer_index.flags = htobe32(viewer_index.flags); + viewer_index.status = htobe32(viewer_index.status); health_code_update(); ret = send_response(conn->sock, &viewer_index, sizeof(viewer_index)); @@ -1818,6 +2005,7 @@ end: error_put: pthread_mutex_unlock(&rstream->lock); + pthread_mutex_unlock(&rstream->trace->session->lock); if (metadata_viewer_stream) { viewer_stream_put(metadata_viewer_stream); } @@ -1844,8 +2032,6 @@ int viewer_get_packet(struct relay_connection *conn) ssize_t read_len; uint64_t stream_id; - DBG2("Relay get data packet"); - health_code_update(); ret = recv_request(conn->sock, &get_packet_info, @@ -1958,8 +2144,6 @@ int viewer_get_metadata(struct relay_connection *conn) LTTNG_ASSERT(conn); - DBG("Relay get metadata"); - health_code_update(); ret = recv_request(conn->sock, &request, sizeof(request)); @@ -2006,11 +2190,18 @@ int viewer_get_metadata(struct relay_connection *conn) * an error. */ if (vstream->metadata_sent > 0) { - vstream->stream->no_new_metadata_notified = true; - if (vstream->stream->closed) { - /* Release ownership for the viewer metadata stream. */ + if (vstream->stream->closed && vstream->stream->no_new_metadata_notified) { + /* + * Release ownership for the viewer metadata + * stream. Note that this reference is the + * viewer's reference. The vstream still exists + * until the end of the function as + * viewer_stream_get_by_id() took a reference. + */ viewer_stream_put(vstream); } + + vstream->stream->no_new_metadata_notified = true; } goto send_reply; } @@ -2032,8 +2223,8 @@ int viewer_get_metadata(struct relay_connection *conn) } if (conn->viewer_session->current_trace_chunk && - conn->viewer_session->current_trace_chunk != - vstream->stream_file.trace_chunk) { + !lttng_trace_chunk_ids_equal(conn->viewer_session->current_trace_chunk, + vstream->stream_file.trace_chunk)) { bool acquired_reference; DBG("Viewer session and viewer stream chunk differ: " @@ -2219,8 +2410,6 @@ int viewer_create_session(struct relay_connection *conn) int ret; struct lttng_viewer_create_session_response resp; - DBG("Viewer create session received"); - memset(&resp, 0, sizeof(resp)); resp.status = htobe32(LTTNG_VIEWER_CREATE_SESSION_OK); conn->viewer_session = viewer_session_create(); @@ -2257,8 +2446,6 @@ int viewer_detach_session(struct relay_connection *conn) struct relay_session *session = NULL; uint64_t viewer_session_to_close; - DBG("Viewer detach session received"); - LTTNG_ASSERT(conn); health_code_update(); @@ -2337,21 +2524,24 @@ int process_control(struct lttng_viewer_cmd *recv_hdr, struct relay_connection *conn) { int ret = 0; - uint32_t msg_value; - - msg_value = be32toh(recv_hdr->cmd); + lttng_viewer_command cmd = + (lttng_viewer_command) be32toh(recv_hdr->cmd); /* - * Make sure we've done the version check before any command other then a - * new client connection. + * Make sure we've done the version check before any command other then + * a new client connection. */ - if (msg_value != LTTNG_VIEWER_CONNECT && !conn->version_check_done) { - ERR("Viewer conn value %" PRIu32 " before version check", msg_value); + if (cmd != LTTNG_VIEWER_CONNECT && !conn->version_check_done) { + ERR("Viewer on connection %d requested %s command before version check", + conn->sock->fd, lttng_viewer_command_str(cmd)); ret = -1; goto end; } - switch (msg_value) { + DBG("Processing %s viewer command from connection %d", + lttng_viewer_command_str(cmd), conn->sock->fd); + + switch (cmd) { case LTTNG_VIEWER_CONNECT: ret = viewer_connect(conn); break;