Stop rotation pending check timer from the rotation thread
[lttng-tools.git] / src / bin / lttng-sessiond / rotation-thread.c
index 2dfd9735741b7f12111a5af45e6dee346bf631dd..a0b924865d3e02fe04fc5ebd5c2517cb69ee165e 100644 (file)
@@ -56,7 +56,7 @@ struct rotation_thread {
 
 struct rotation_thread_job {
        enum rotation_thread_job_type type;
-       uint64_t session_id;
+       struct ltt_session *session;
        /* List member in struct rotation_thread_timer_queue. */
        struct cds_list_head head;
 };
@@ -72,11 +72,9 @@ struct rotation_thread_timer_queue {
 };
 
 struct rotation_thread_handle {
-       int quit_pipe;
        struct rotation_thread_timer_queue *rotation_timer_queue;
        /* Access to the notification thread cmd_queue */
        struct notification_thread_handle *notification_thread_handle;
-       sem_t *notification_thread_ready;
 };
 
 static
@@ -132,8 +130,8 @@ void log_job_destruction(const struct rotation_thread_job *job)
                abort();
        }
 
-       LOG(log_level, "Rotation thread timer queue still contains job of type %s targeting session %" PRIu64 " on destruction",
-                       job_type_str, job->session_id);
+       LOG(log_level, "Rotation thread timer queue still contains job of type %s targeting session \"%s\" on destruction",
+                       job_type_str, job->session->name);
 }
 
 void rotation_thread_timer_queue_destroy(
@@ -169,10 +167,8 @@ void rotation_thread_handle_destroy(
 }
 
 struct rotation_thread_handle *rotation_thread_handle_create(
-               int quit_pipe,
                struct rotation_thread_timer_queue *rotation_timer_queue,
-               struct notification_thread_handle *notification_thread_handle,
-               sem_t *notification_thread_ready)
+               struct notification_thread_handle *notification_thread_handle)
 {
        struct rotation_thread_handle *handle;
 
@@ -181,10 +177,8 @@ struct rotation_thread_handle *rotation_thread_handle_create(
                goto end;
        }
 
-       handle->quit_pipe = quit_pipe;
        handle->rotation_timer_queue = rotation_timer_queue;
        handle->notification_thread_handle = notification_thread_handle;
-       handle->notification_thread_ready = notification_thread_ready;
 
 end:
        return handle;
@@ -196,13 +190,14 @@ end:
  */
 static
 bool timer_job_exists(const struct rotation_thread_timer_queue *queue,
-               enum rotation_thread_job_type job_type, uint64_t session_id)
+               enum rotation_thread_job_type job_type,
+               struct ltt_session *session)
 {
        bool exists = false;
        struct rotation_thread_job *job;
 
        cds_list_for_each_entry(job, &queue->list, head) {
-               if (job->session_id == session_id && job->type == job_type) {
+               if (job->session == session && job->type == job_type) {
                        exists = true;
                        goto end;
                }
@@ -212,7 +207,8 @@ end:
 }
 
 void rotation_thread_enqueue_job(struct rotation_thread_timer_queue *queue,
-               enum rotation_thread_job_type job_type, uint64_t session_id)
+               enum rotation_thread_job_type job_type,
+               struct ltt_session *session)
 {
        int ret;
        const char * const dummy = "!";
@@ -220,7 +216,7 @@ void rotation_thread_enqueue_job(struct rotation_thread_timer_queue *queue,
        const char *job_type_str = get_job_type_str(job_type);
 
        pthread_mutex_lock(&queue->lock);
-       if (timer_job_exists(queue, session_id, job_type)) {
+       if (timer_job_exists(queue, job_type, session)) {
                /*
                 * This timer job is already pending, we don't need to add
                 * it.
@@ -230,12 +226,15 @@ void rotation_thread_enqueue_job(struct rotation_thread_timer_queue *queue,
 
        job = zmalloc(sizeof(struct rotation_thread_job));
        if (!job) {
-               PERROR("Failed to allocate rotation thread job of type \"%s\" for session id %" PRIu64,
-                               job_type_str, session_id);
+               PERROR("Failed to allocate rotation thread job of type \"%s\" for session \"%s\"",
+                               job_type_str, session->name);
                goto end;
        }
+       /* No reason for this to fail as the caller must hold a reference. */
+       (void) session_get(session);
+
+       job->session = session;
        job->type = job_type;
-       job->session_id = session_id;
        cds_list_add_tail(&job->head, &queue->list);
 
        ret = lttng_write(lttng_pipe_get_writefd(queue->event_pipe), dummy,
@@ -256,8 +255,8 @@ void rotation_thread_enqueue_job(struct rotation_thread_timer_queue *queue,
                        DBG("Wake-up pipe of rotation thread job queue is full");
                        goto end;
                }
-               PERROR("Failed to wake-up the rotation thread after pushing a job of type \"%s\" for session id %" PRIu64,
-                               job_type_str, session_id);
+               PERROR("Failed to wake-up the rotation thread after pushing a job of type \"%s\" for session \"%s\"",
+                               job_type_str, session->name);
                goto end;
        }
 
@@ -276,15 +275,8 @@ int init_poll_set(struct lttng_poll_event *poll_set,
         *      - quit pipe,
         *      - rotation thread timer queue pipe,
         */
-       ret = lttng_poll_create(poll_set, 2, LTTNG_CLOEXEC);
-       if (ret < 0) {
-               goto end;
-       }
-
-       ret = lttng_poll_add(poll_set, handle->quit_pipe,
-                       LPOLLIN | LPOLLERR);
-       if (ret < 0) {
-               ERR("[rotation-thread] Failed to add quit_pipe fd to pollset");
+       ret = sessiond_set_thread_pollset(poll_set, 2);
+       if (ret) {
                goto error;
        }
        ret = lttng_poll_add(poll_set,
@@ -295,7 +287,6 @@ int init_poll_set(struct lttng_poll_event *poll_set,
                goto error;
        }
 
-end:
        return ret;
 error:
        lttng_poll_clean(poll_set);
@@ -326,11 +317,6 @@ int init_thread_state(struct rotation_thread_handle *handle,
                goto end;
        }
 
-       /*
-        * We wait until the notification thread is ready to create the
-        * notification channel and add it to the poll_set.
-        */
-       sem_wait(handle->notification_thread_ready);
        rotate_notification_channel = lttng_notification_channel_create(
                        lttng_session_daemon_notification_endpoint);
        if (!rotate_notification_channel) {
@@ -394,7 +380,7 @@ int check_session_rotation_pending_local_on_consumer(
 static
 int check_session_rotation_pending_local(struct ltt_session *session)
 {
-       int ret;
+       int ret = 0;
        struct consumer_socket *socket;
        struct cds_lfht_iter iter;
        bool rotation_completed = true;
@@ -442,7 +428,12 @@ end:
                session->rotation_pending_local = false;
        }
        if (ret) {
-               session->rotation_state = LTTNG_ROTATION_STATE_ERROR;
+               ret = session_reset_rotation_state(session,
+                               LTTNG_ROTATION_STATE_ERROR);
+               if (ret) {
+                       ERR("Failed to reset rotation state of session \"%s\"",
+                                       session->name);
+               }
        }
        return 0;
 }
@@ -502,14 +493,19 @@ int check_session_rotation_pending_relay(struct ltt_session *session)
                ERR("[rotation-thread] Encountered an error when checking if rotation of trace archive %" PRIu64 " of session \"%s\" is pending on the relay",
                                session->current_archive_id - 1,
                                session->name);
-               session->rotation_state = LTTNG_ROTATION_STATE_ERROR;
+               ret = session_reset_rotation_state(session,
+                               LTTNG_ROTATION_STATE_ERROR);
+               if (ret) {
+                       ERR("Failed to reset rotation state of session \"%s\"",
+                                       session->name);
+               }
                rotation_completed = false;
        }
 
        rcu_read_unlock();
 
        if (rotation_completed) {
-               DBG("[rotation-thread] Totation of trace archive %" PRIu64 " of session \"%s\" is complete on the relay",
+               DBG("[rotation-thread] Rotation of trace archive %" PRIu64 " of session \"%s\" is complete on the relay",
                                session->current_archive_id - 1,
                                session->name);
                session->rotation_pending_relay = false;
@@ -531,6 +527,19 @@ int check_session_rotation_pending(struct ltt_session *session,
        DBG("[rotation-thread] Checking for pending rotation on session \"%s\", trace archive %" PRIu64,
                        session->name, session->current_archive_id - 1);
 
+       /*
+        * The rotation-pending check timer of a session is launched in
+        * one-shot mode. If the rotation is incomplete, the rotation
+        * thread will re-enable the pending-check timer.
+        *
+        * The timer thread can't stop the timer itself since it is involved
+        * in the check for the timer's quiescence.
+        */
+       ret = timer_session_rotation_pending_check_stop(session);
+       if (ret) {
+               goto end;
+       }
+
        if (session->rotation_pending_local) {
                /* Updates session->rotation_pending_local as needed. */
                ret = check_session_rotation_pending_local(session);
@@ -566,7 +575,12 @@ int check_session_rotation_pending(struct ltt_session *session,
        /* Rename the completed trace archive's location. */
        now = time(NULL);
        if (now == (time_t) -1) {
-               session->rotation_state = LTTNG_ROTATION_STATE_ERROR;
+               ret = session_reset_rotation_state(session,
+                               LTTNG_ROTATION_STATE_ERROR);
+               if (ret) {
+                       ERR("Failed to reset rotation state of session \"%s\"",
+                                       session->name);
+               }
                ret = LTTNG_ERR_UNK;
                goto end;
        }
@@ -598,6 +612,44 @@ int check_session_rotation_pending(struct ltt_session *session,
                                session->name);
        }
 
+       if (!session->active) {
+               /*
+                * A stop command was issued during the rotation, it is
+                * up to the rotation completion check to perform the
+                * renaming of the last chunk that was produced.
+                */
+               ret = notification_thread_command_session_rotation_ongoing(
+                               notification_thread_handle,
+                               session->name,
+                               session->uid,
+                               session->gid,
+                               session->current_archive_id);
+               if (ret != LTTNG_OK) {
+                       ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s",
+                                       session->name);
+               }
+
+               ret = rename_active_chunk(session);
+               if (ret < 0) {
+                       ERR("[rotation-thread] Failed to rename active rotation chunk");
+                       goto end;
+               }
+
+               /* Ownership of location is transferred. */
+               location = session_get_trace_archive_location(session);
+               ret = notification_thread_command_session_rotation_completed(
+                               notification_thread_handle,
+                               session->name,
+                               session->uid,
+                               session->gid,
+                               session->current_archive_id,
+                               location);
+               if (ret != LTTNG_OK) {
+                       ERR("[rotation-thread] Failed to notify notification thread of completed rotation for session %s",
+                                       session->name);
+               }
+       }
+
        ret = 0;
 end:
        if (session->rotation_state == LTTNG_ROTATION_STATE_ONGOING) {
@@ -615,7 +667,7 @@ end:
        return ret;
 }
 
-/* Call with the session lock held. */
+/* Call with the session and session_list locks held. */
 static
 int launch_session_rotation(struct ltt_session *session)
 {
@@ -664,7 +716,6 @@ int handle_job_queue(struct rotation_thread_handle *handle,
 {
        int ret = 0;
        int fd = lttng_pipe_get_readfd(queue->event_pipe);
-       struct ltt_session *session;
        char buf;
 
        ret = lttng_read(fd, &buf, 1);
@@ -675,6 +726,7 @@ int handle_job_queue(struct rotation_thread_handle *handle,
        }
 
        for (;;) {
+               struct ltt_session *session;
                struct rotation_thread_job *job;
 
                /* Take the queue lock only to pop an element from the list. */
@@ -689,10 +741,10 @@ int handle_job_queue(struct rotation_thread_handle *handle,
                pthread_mutex_unlock(&queue->lock);
 
                session_lock_list();
-               session = session_find_by_id(job->session_id);
+               session = job->session;
                if (!session) {
-                       DBG("[rotation-thread] Session %" PRIu64 " not found",
-                                       job->session_id);
+                       DBG("[rotation-thread] Session \"%s\" not found",
+                                       session->name);
                        /*
                         * This is a non-fatal error, and we cannot report it to
                         * the user (timer), so just print the error and
@@ -705,14 +757,16 @@ int handle_job_queue(struct rotation_thread_handle *handle,
                         */
                        session_unlock_list();
                        free(job);
+                       session_put(session);
                        continue;
                }
 
                session_lock(session);
-               session_unlock_list();
-
                ret = run_job(job, session, handle->notification_thread_handle);
                session_unlock(session);
+               /* Release reference held by the job. */
+               session_put(session);
+               session_unlock_list();
                free(job);
                if (ret) {
                        goto end;
@@ -777,7 +831,7 @@ int handle_condition(const struct lttng_condition *condition,
        ret = unsubscribe_session_consumed_size_rotation(session,
                        notification_thread_handle);
        if (ret) {
-               goto end;
+               goto end_unlock;
        }
 
        ret = cmd_rotate_session(session, NULL);
@@ -800,6 +854,7 @@ int handle_condition(const struct lttng_condition *condition,
 
 end_unlock:
        session_unlock(session);
+       session_put(session);
 end:
        return ret;
 }
@@ -819,7 +874,7 @@ int handle_notification_channel(int fd,
        status = lttng_notification_channel_has_pending_notification(
                        rotate_notification_channel, &notification_pending);
        if (status != LTTNG_NOTIFICATION_CHANNEL_STATUS_OK) {
-               ERR("[rotation-thread ]Error occured while checking for pending notification");
+               ERR("[rotation-thread ]Error occurred while checking for pending notification");
                ret = -1;
                goto end;
        }
@@ -876,12 +931,15 @@ void *thread_rotation(void *data)
                goto end;
        }
 
+       rcu_register_thread();
+       rcu_thread_online();
+
        health_register(health_sessiond, HEALTH_SESSIOND_TYPE_ROTATION);
        health_code_update();
 
        ret = init_thread_state(handle, &thread);
        if (ret) {
-               goto end;
+               goto error;
        }
 
        /* Ready to handle client connections. */
@@ -919,7 +977,7 @@ void *thread_rotation(void *data)
                                goto error;
                        }
 
-                       if (fd == handle->quit_pipe) {
+                       if (sessiond_check_thread_quit_pipe(fd, revents)) {
                                DBG("[rotation-thread] Quit pipe activity");
                                /* TODO flush the queue. */
                                goto exit;
@@ -934,7 +992,7 @@ void *thread_rotation(void *data)
                                ret = handle_notification_channel(fd, handle,
                                                &thread);
                                if (ret) {
-                                       ERR("[rotation-thread] Error occured while handling activity on notification channel socket");
+                                       ERR("[rotation-thread] Error occurred while handling activity on notification channel socket");
                                        goto error;
                                }
                        }
@@ -945,6 +1003,8 @@ error:
        DBG("[rotation-thread] Exit");
        fini_thread_state(&thread);
        health_unregister(health_sessiond);
+       rcu_thread_offline();
+       rcu_unregister_thread();
 end:
        return NULL;
 }
This page took 0.029596 seconds and 4 git commands to generate.