#include "ust-consumer.h"
#include "utils.h"
#include "fd-limit.h"
-#include "health.h"
+#include "health-sessiond.h"
#include "testpoint.h"
#include "ust-thread.h"
/* Set in main() with the current page size. */
long page_size;
+/* Application health monitoring */
+struct health_app *health_sessiond;
+
static
void setup_consumerd_path(void)
{
DBG("[thread] Thread manage kernel started");
- health_register(HEALTH_TYPE_KERNEL);
+ health_register(health_sessiond, HEALTH_TYPE_KERNEL);
/*
* This first step of the while is to clean this structure which could free
WARN("Kernel thread died unexpectedly. "
"Kernel tracing can continue but CPU hotplug is disabled.");
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("Kernel thread dying");
return NULL;
}
DBG("[thread] Manage consumer started");
- health_register(HEALTH_TYPE_CONSUMER);
+ health_register(health_sessiond, HEALTH_TYPE_CONSUMER);
health_code_update();
lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
consumer_data->metadata_fd =
lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
- if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
+ if (consumer_data->cmd_sock < 0
+ || consumer_data->metadata_fd < 0) {
PERROR("consumer connect cmd socket");
/* On error, signal condition and quit. */
signal_consumer_condition(consumer_data, -1);
error:
/*
* We lock here because we are about to close the sockets and some other
- * thread might be using them so wait before we are exclusive which will
- * abort all other consumer command by other threads.
+ * thread might be using them so get exclusive access which will abort all
+ * other consumer command by other threads.
*/
pthread_mutex_lock(&consumer_data->lock);
unlink(consumer_data->cmd_unix_sock_path);
consumer_data->pid = 0;
pthread_mutex_unlock(&consumer_data->lock);
+
/* Cleanup metadata socket mutex. */
pthread_mutex_destroy(consumer_data->metadata_sock.lock);
free(consumer_data->metadata_sock.lock);
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("consumer thread cleanup completed");
return NULL;
rcu_register_thread();
rcu_thread_online();
- health_register(HEALTH_TYPE_APP_MANAGE);
+ health_register(health_sessiond, HEALTH_TYPE_APP_MANAGE);
if (testpoint(thread_manage_apps)) {
goto error_testpoint;
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("Application communication apps thread cleanup complete");
rcu_thread_offline();
rcu_unregister_thread();
* Send a socket to a thread This is called from the dispatch UST registration
* thread once all sockets are set for the application.
*
+ * The sock value can be invalid, we don't really care, the thread will handle
+ * it and make the necessary cleanup if so.
+ *
* On success, return 0 else a negative value being the errno message of the
* write().
*/
{
int ret;
- /* Sockets MUST be set or else this should not have been called. */
- assert(fd >= 0);
- assert(sock >= 0);
+ /*
+ * It's possible that the FD is set as invalid with -1 concurrently just
+ * before calling this function being a shutdown state of the thread.
+ */
+ if (fd < 0) {
+ ret = -EBADF;
+ goto error;
+ }
do {
ret = write(fd, &sock, sizeof(sock));
.count = 0,
};
- health_register(HEALTH_TYPE_APP_REG_DISPATCH);
+ health_register(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
health_code_update();
if (ret < 0) {
rcu_read_unlock();
session_unlock_list();
- /* No notify thread, stop the UST tracing. */
+ /*
+ * No notify thread, stop the UST tracing. However, this is
+ * not an internal error of the this thread thus setting
+ * the health error code to a normal exit.
+ */
+ err = 0;
goto error;
}
if (ret < 0) {
rcu_read_unlock();
session_unlock_list();
- /* No apps. thread, stop the UST tracing. */
+ /*
+ * No apps. thread, stop the UST tracing. However, this is
+ * not an internal error of the this thread thus setting
+ * the health error code to a normal exit.
+ */
+ err = 0;
goto error;
}
health_error();
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
return NULL;
}
DBG("[thread] Manage application registration started");
- health_register(HEALTH_TYPE_APP_REG);
+ health_register(health_sessiond, HEALTH_TYPE_APP_REG);
if (testpoint(thread_registration_apps)) {
goto error_testpoint;
error_create_poll:
error_testpoint:
DBG("UST Registration thread cleanup complete");
- health_unregister();
+ health_unregister(health_sessiond);
return NULL;
}
{
int ret;
- ret = health_check_state(HEALTH_TYPE_CONSUMER);
+ ret = health_check_state(health_sessiond, HEALTH_TYPE_CONSUMER);
DBG3("Health consumer check %d", ret);
lus->gid = session->gid;
lus->output_traces = session->output_traces;
lus->snapshot_mode = session->snapshot_mode;
+ lus->live_timer_interval = session->live_timer;
session->ust_session = lus;
/* Copy session output to the newly created UST session */
switch (cmd_ctx->lsm->cmd_type) {
case LTTNG_CREATE_SESSION:
case LTTNG_CREATE_SESSION_SNAPSHOT:
+ case LTTNG_CREATE_SESSION_LIVE:
case LTTNG_DESTROY_SESSION:
case LTTNG_LIST_SESSIONS:
case LTTNG_LIST_DOMAINS:
switch (cmd_ctx->lsm->cmd_type) {
case LTTNG_CREATE_SESSION:
case LTTNG_CREATE_SESSION_SNAPSHOT:
+ case LTTNG_CREATE_SESSION_LIVE:
case LTTNG_CALIBRATE:
case LTTNG_LIST_SESSIONS:
case LTTNG_LIST_TRACEPOINTS:
}
ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri,
- &cmd_ctx->creds);
+ &cmd_ctx->creds, 0);
free(uris);
free(uris);
break;
}
+ case LTTNG_CREATE_SESSION_LIVE:
+ {
+ size_t nb_uri, len;
+ struct lttng_uri *uris = NULL;
+
+ nb_uri = cmd_ctx->lsm->u.uri.size;
+ len = nb_uri * sizeof(struct lttng_uri);
+
+ if (nb_uri > 0) {
+ uris = zmalloc(len);
+ if (uris == NULL) {
+ ret = LTTNG_ERR_FATAL;
+ goto error;
+ }
+
+ /* Receive variable len data */
+ DBG("Waiting for %zu URIs from client ...", nb_uri);
+ ret = lttcomm_recv_unix_sock(sock, uris, len);
+ if (ret <= 0) {
+ DBG("No URIs received from client... continuing");
+ *sock_error = 1;
+ ret = LTTNG_ERR_SESSION_FAIL;
+ free(uris);
+ goto error;
+ }
+
+ if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) {
+ DBG("Creating session with ONE network URI is a bad call");
+ ret = LTTNG_ERR_SESSION_FAIL;
+ free(uris);
+ goto error;
+ }
+ }
+
+ ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris,
+ nb_uri, &cmd_ctx->creds, cmd_ctx->lsm->u.session_live.timer_interval);
+ free(uris);
+ break;
+ }
default:
ret = LTTNG_ERR_UND;
break;
switch (msg.component) {
case LTTNG_HEALTH_CMD:
- reply.ret_code = health_check_state(HEALTH_TYPE_CMD);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_CMD);
break;
case LTTNG_HEALTH_APP_MANAGE:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE);
break;
case LTTNG_HEALTH_APP_REG:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG);
break;
case LTTNG_HEALTH_KERNEL:
- reply.ret_code = health_check_state(HEALTH_TYPE_KERNEL);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_KERNEL);
break;
case LTTNG_HEALTH_CONSUMER:
reply.ret_code = check_consumer_health();
break;
case LTTNG_HEALTH_HT_CLEANUP:
- reply.ret_code = health_check_state(HEALTH_TYPE_HT_CLEANUP);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP);
break;
case LTTNG_HEALTH_APP_MANAGE_NOTIFY:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE_NOTIFY);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY);
break;
case LTTNG_HEALTH_APP_REG_DISPATCH:
- reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG_DISPATCH);
+ reply.ret_code = health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
break;
case LTTNG_HEALTH_ALL:
reply.ret_code =
- health_check_state(HEALTH_TYPE_APP_MANAGE) &&
- health_check_state(HEALTH_TYPE_APP_REG) &&
- health_check_state(HEALTH_TYPE_CMD) &&
- health_check_state(HEALTH_TYPE_KERNEL) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_REG) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_CMD) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_KERNEL) &&
check_consumer_health() &&
- health_check_state(HEALTH_TYPE_HT_CLEANUP) &&
- health_check_state(HEALTH_TYPE_APP_MANAGE_NOTIFY) &&
- health_check_state(HEALTH_TYPE_APP_REG_DISPATCH);
+ health_check_state(health_sessiond, HEALTH_TYPE_HT_CLEANUP) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_MANAGE_NOTIFY) &&
+ health_check_state(health_sessiond, HEALTH_TYPE_APP_REG_DISPATCH);
break;
default:
reply.ret_code = LTTNG_ERR_UND;
rcu_register_thread();
- health_register(HEALTH_TYPE_CMD);
+ health_register(health_sessiond, HEALTH_TYPE_CMD);
if (testpoint(thread_manage_clients)) {
goto error_testpoint;
ERR("Health error occurred in %s", __func__);
}
- health_unregister();
+ health_unregister(health_sessiond);
DBG("Client thread dying");
/* Initialize communication library */
lttcomm_init();
+ /* This is to get the TCP timeout value. */
+ lttcomm_inet_init();
+
+ /*
+ * Initialize the health check subsystem. This call should set the
+ * appropriate time values.
+ */
+ health_sessiond = health_app_create(HEALTH_NUM_TYPE);
+ if (!health_sessiond) {
+ PERROR("health_app_create error");
+ goto exit_health_sessiond_cleanup;
+ }
/* Create thread to manage the client socket */
ret = pthread_create(&ht_cleanup_thread, NULL,
ust_thread_manage_notify, (void *) NULL);
if (ret != 0) {
PERROR("pthread_create apps");
- goto exit_apps;
+ goto exit_apps_notify;
}
/* Don't start this thread if kernel tracing is not requested nor root */
}
exit_kernel:
+ ret = pthread_join(apps_notify_thread, &status);
+ if (ret != 0) {
+ PERROR("pthread_join apps notify");
+ goto error; /* join error, exit without cleanup */
+ }
+
+exit_apps_notify:
ret = pthread_join(apps_thread, &status);
if (ret != 0) {
- PERROR("pthread_join");
+ PERROR("pthread_join apps");
goto error; /* join error, exit without cleanup */
}
+
exit_apps:
ret = pthread_join(reg_apps_thread, &status);
if (ret != 0) {
goto error; /* join error, exit without cleanup */
}
exit_ht_cleanup:
+ health_app_destroy(health_sessiond);
+exit_health_sessiond_cleanup:
exit:
/*
* cleanup() is called when no other thread is running.