#include "ust-consumer.h"
#include "utils.h"
#include "fd-limit.h"
+#include "filter.h"
+#include "health.h"
#define CONSUMERD_FILE "lttng-consumerd"
static char client_unix_sock_path[PATH_MAX];
/* global wait shm path for UST */
static char wait_shm_path[PATH_MAX];
+/* Global health check unix path */
+static char health_unix_sock_path[PATH_MAX];
/* Sockets and FDs */
static int client_sock = -1;
static pthread_t client_thread;
static pthread_t kernel_thread;
static pthread_t dispatch_thread;
+static pthread_t health_thread;
/*
* UST registration command queue. This queue is tied with a futex and uses a N
*/
static unsigned int relayd_net_seq_idx;
+/* Used for the health monitoring of the session daemon. See health.h */
+struct health_state health_thread_cmd;
+struct health_state health_thread_app_reg;
+struct health_state health_thread_kernel;
+
static
void setup_consumerd_path(void)
{
DBG("Thread manage kernel started");
+ health_code_update(&health_thread_kernel);
+
ret = create_thread_poll_set(&events, 2);
if (ret < 0) {
goto error_poll_create;
}
while (1) {
+ health_code_update(&health_thread_kernel);
+
if (update_poll_flag == 1) {
/*
* Reset number of fd in the poll set. Always 2 since there is the thread
/* Poll infinite value of time */
restart:
+ health_poll_update(&health_thread_kernel);
ret = lttng_poll_wait(&events, -1);
+ health_poll_update(&health_thread_kernel);
if (ret < 0) {
/*
* Restart interrupted system call.
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
+ health_code_update(&health_thread_kernel);
+
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
error:
lttng_poll_clean(&events);
error_poll_create:
+ health_reset(&health_thread_kernel);
DBG("Kernel thread dying");
return NULL;
}
DBG("[thread] Manage consumer started");
+ health_code_update(&consumer_data->health);
+
ret = lttcomm_listen_unix_sock(consumer_data->err_sock);
if (ret < 0) {
goto error_listen;
nb_fd = LTTNG_POLL_GETNB(&events);
+ health_code_update(&consumer_data->health);
+
/* Inifinite blocking call, waiting for transmission */
restart:
+ health_poll_update(&consumer_data->health);
ret = lttng_poll_wait(&events, -1);
+ health_poll_update(&consumer_data->health);
if (ret < 0) {
/*
* Restart interrupted system call.
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
+ health_code_update(&consumer_data->health);
+
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
goto error;
}
+ health_code_update(&consumer_data->health);
+
DBG2("Receiving code from consumer err_sock");
/* Getting status code from kconsumerd */
goto error;
}
+ health_code_update(&consumer_data->health);
+
if (code == CONSUMERD_COMMAND_SOCK_READY) {
consumer_data->cmd_sock =
lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
goto error;
}
+ health_code_update(&consumer_data->health);
+
/* Update number of fd */
nb_fd = LTTNG_POLL_GETNB(&events);
/* Inifinite blocking call, waiting for transmission */
restart_poll:
+ health_poll_update(&consumer_data->health);
ret = lttng_poll_wait(&events, -1);
+ health_poll_update(&consumer_data->health);
if (ret < 0) {
/*
* Restart interrupted system call.
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
+ health_code_update(&consumer_data->health);
+
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
}
}
+ health_code_update(&consumer_data->health);
+
/* Wait for any kconsumerd error */
ret = lttcomm_recv_unix_sock(sock, &code,
sizeof(enum lttcomm_return_code));
lttng_poll_clean(&events);
error_poll:
error_listen:
+ health_reset(&consumer_data->health);
DBG("consumer thread cleanup completed");
return NULL;
rcu_register_thread();
rcu_thread_online();
+ health_code_update(&health_thread_app_reg);
+
ret = create_thread_poll_set(&events, 2);
if (ret < 0) {
goto error_poll_create;
goto error;
}
+ health_code_update(&health_thread_app_reg);
+
while (1) {
/* Zeroed the events structure */
lttng_poll_reset(&events);
/* Inifinite blocking call, waiting for transmission */
restart:
+ health_poll_update(&health_thread_app_reg);
ret = lttng_poll_wait(&events, -1);
+ health_poll_update(&health_thread_app_reg);
if (ret < 0) {
/*
* Restart interrupted system call.
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
+ health_code_update(&health_thread_app_reg);
+
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
goto error;
}
+ health_code_update(&health_thread_app_reg);
+
/* Register applicaton to the session daemon */
ret = ust_app_register(&ust_cmd.reg_msg,
ust_cmd.sock);
break;
}
+ health_code_update(&health_thread_app_reg);
+
/*
* Validate UST version compatibility.
*/
update_ust_app(ust_cmd.sock);
}
+ health_code_update(&health_thread_app_reg);
+
ret = ust_app_register_done(ust_cmd.sock);
if (ret < 0) {
/*
ust_cmd.sock);
}
+ health_code_update(&health_thread_app_reg);
+
break;
}
} else {
break;
}
}
+
+ health_code_update(&health_thread_app_reg);
}
}
error:
lttng_poll_clean(&events);
error_poll_create:
+ health_reset(&health_thread_app_reg);
DBG("Application communication apps thread cleanup complete");
rcu_thread_offline();
rcu_unregister_thread();
return ret;
}
+/*
+ * Compute health status of each consumer.
+ */
+static int check_consumer_health(void)
+{
+ int ret;
+
+ ret =
+ health_check_state(&kconsumer_data.health) &
+ health_check_state(&ustconsumer32_data.health) &
+ health_check_state(&ustconsumer64_data.health);
+
+ DBG3("Health consumer check %d", ret);
+
+ return ret;
+}
+
/*
* Check version of the lttng-modules.
*/
tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_SINGLE;
break;
}
+ if (uevent->filter) {
+ tmp[i].filter = 1;
+ }
i++;
}
return ret;
}
+/*
+ * Command LTTNG_SET_FILTER processed by the client thread.
+ */
+static int cmd_set_filter(struct ltt_session *session, int domain,
+ char *channel_name, char *event_name,
+ struct lttng_filter_bytecode *bytecode)
+{
+ int ret;
+
+ switch (domain) {
+ case LTTNG_DOMAIN_KERNEL:
+ ret = LTTCOMM_FATAL;
+ break;
+ case LTTNG_DOMAIN_UST:
+ {
+ struct ltt_ust_session *usess = session->ust_session;
+
+ ret = filter_ust_set(usess, domain, bytecode, event_name, channel_name);
+ if (ret != LTTCOMM_OK) {
+ goto error;
+ }
+ break;
+ }
+#if 0
+ case LTTNG_DOMAIN_UST_EXEC_NAME:
+ case LTTNG_DOMAIN_UST_PID:
+ case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN:
+#endif
+ default:
+ ret = LTTCOMM_UND;
+ goto error;
+ }
+
+ ret = LTTCOMM_OK;
+
+error:
+ return ret;
+
+}
+
/*
* Command LTTNG_ENABLE_EVENT processed by the client thread.
*/
* is set and ready for transmission before returning.
*
* Return any error encountered or 0 for success.
+ *
+ * "sock" is only used for special-case var. len data.
*/
-static int process_client_msg(struct command_ctx *cmd_ctx)
+static int process_client_msg(struct command_ctx *cmd_ctx, int sock,
+ int *sock_error)
{
int ret = LTTCOMM_OK;
int need_tracing_session = 1;
DBG("Processing client command %d", cmd_ctx->lsm->cmd_type);
+ *sock_error = 0;
+
switch (cmd_ctx->lsm->cmd_type) {
case LTTNG_CREATE_SESSION:
case LTTNG_CREATE_SESSION_URI:
cmd_ctx->lsm->u.reg.path);
break;
}
+ case LTTNG_SET_FILTER:
+ {
+ struct lttng_filter_bytecode *bytecode;
+
+ if (cmd_ctx->lsm->u.filter.bytecode_len > 65336) {
+ ret = LTTCOMM_FILTER_INVAL;
+ goto error;
+ }
+ bytecode = zmalloc(cmd_ctx->lsm->u.filter.bytecode_len);
+ if (!bytecode) {
+ ret = LTTCOMM_FILTER_NOMEM;
+ goto error;
+ }
+ /* Receive var. len. data */
+ DBG("Receiving var len data from client ...");
+ ret = lttcomm_recv_unix_sock(sock, bytecode,
+ cmd_ctx->lsm->u.filter.bytecode_len);
+ if (ret <= 0) {
+ DBG("Nothing recv() from client var len data... continuing");
+ *sock_error = 1;
+ ret = LTTCOMM_FILTER_INVAL;
+ goto error;
+ }
+
+ if (bytecode->len + sizeof(*bytecode)
+ != cmd_ctx->lsm->u.filter.bytecode_len) {
+ free(bytecode);
+ ret = LTTCOMM_FILTER_INVAL;
+ goto error;
+ }
+
+ ret = cmd_set_filter(cmd_ctx->session, cmd_ctx->lsm->domain.type,
+ cmd_ctx->lsm->u.filter.channel_name,
+ cmd_ctx->lsm->u.filter.event_name,
+ bytecode);
+ break;
+ }
default:
ret = LTTCOMM_UND;
break;
return ret;
}
+/*
+ * Thread managing health check socket.
+ */
+static void *thread_manage_health(void *data)
+{
+ int sock = -1, new_sock, ret, i, pollfd;
+ uint32_t revents, nb_fd;
+ struct lttng_poll_event events;
+ struct lttcomm_health_msg msg;
+ struct lttcomm_health_data reply;
+
+ DBG("[thread] Manage health check started");
+
+ rcu_register_thread();
+
+ /* Create unix socket */
+ sock = lttcomm_create_unix_sock(health_unix_sock_path);
+ if (sock < 0) {
+ ERR("Unable to create health check Unix socket");
+ ret = -1;
+ goto error;
+ }
+
+ ret = lttcomm_listen_unix_sock(sock);
+ if (ret < 0) {
+ goto error;
+ }
+
+ /*
+ * Pass 2 as size here for the thread quit pipe and client_sock. Nothing
+ * more will be added to this poll set.
+ */
+ ret = create_thread_poll_set(&events, 2);
+ if (ret < 0) {
+ goto error;
+ }
+
+ /* Add the application registration socket */
+ ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
+ if (ret < 0) {
+ goto error;
+ }
+
+ while (1) {
+ DBG("Health check ready");
+
+ nb_fd = LTTNG_POLL_GETNB(&events);
+
+ /* Inifinite blocking call, waiting for transmission */
+restart:
+ ret = lttng_poll_wait(&events, -1);
+ if (ret < 0) {
+ /*
+ * Restart interrupted system call.
+ */
+ if (errno == EINTR) {
+ goto restart;
+ }
+ goto error;
+ }
+
+ for (i = 0; i < nb_fd; i++) {
+ /* Fetch once the poll data */
+ revents = LTTNG_POLL_GETEV(&events, i);
+ pollfd = LTTNG_POLL_GETFD(&events, i);
+
+ /* Thread quit pipe has been closed. Killing thread. */
+ ret = check_thread_quit_pipe(pollfd, revents);
+ if (ret) {
+ goto error;
+ }
+
+ /* Event on the registration socket */
+ if (pollfd == sock) {
+ if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
+ ERR("Health socket poll error");
+ goto error;
+ }
+ }
+ }
+
+ new_sock = lttcomm_accept_unix_sock(sock);
+ if (new_sock < 0) {
+ goto error;
+ }
+
+ DBG("Receiving data from client for health...");
+ ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
+ if (ret <= 0) {
+ DBG("Nothing recv() from client... continuing");
+ ret = close(new_sock);
+ if (ret) {
+ PERROR("close");
+ }
+ new_sock = -1;
+ continue;
+ }
+
+ rcu_thread_online();
+
+ switch (msg.component) {
+ case LTTNG_HEALTH_CMD:
+ reply.ret_code = health_check_state(&health_thread_cmd);
+ break;
+ case LTTNG_HEALTH_APP_REG:
+ reply.ret_code = health_check_state(&health_thread_app_reg);
+ break;
+ case LTTNG_HEALTH_KERNEL:
+ reply.ret_code = health_check_state(&health_thread_kernel);
+ break;
+ case LTTNG_HEALTH_CONSUMER:
+ reply.ret_code = check_consumer_health();
+ break;
+ case LTTNG_HEALTH_ALL:
+ ret = check_consumer_health();
+
+ reply.ret_code =
+ health_check_state(&health_thread_app_reg) &
+ health_check_state(&health_thread_cmd) &
+ health_check_state(&health_thread_kernel) &
+ ret;
+ break;
+ default:
+ reply.ret_code = LTTCOMM_UND;
+ break;
+ }
+
+ /*
+ * Flip ret value since 0 is a success and 1 indicates a bad health for
+ * the client where in the sessiond it is the opposite. Again, this is
+ * just to make things easier for us poor developer which enjoy a lot
+ * lazyness.
+ */
+ if (reply.ret_code == 0 || reply.ret_code == 1) {
+ reply.ret_code = !reply.ret_code;
+ }
+
+ DBG2("Health check return value %d", reply.ret_code);
+
+ ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
+ if (ret < 0) {
+ ERR("Failed to send health data back to client");
+ }
+
+ /* End of transmission */
+ ret = close(new_sock);
+ if (ret) {
+ PERROR("close");
+ }
+ new_sock = -1;
+ }
+
+error:
+ DBG("Health check thread dying");
+ unlink(health_unix_sock_path);
+ if (sock >= 0) {
+ ret = close(sock);
+ if (ret) {
+ PERROR("close");
+ }
+ }
+ if (new_sock >= 0) {
+ ret = close(new_sock);
+ if (ret) {
+ PERROR("close");
+ }
+ }
+
+ lttng_poll_clean(&events);
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
/*
* This thread manage all clients request using the unix client socket for
* communication.
static void *thread_manage_clients(void *data)
{
int sock = -1, ret, i, pollfd;
+ int sock_error;
uint32_t revents, nb_fd;
struct command_ctx *cmd_ctx = NULL;
struct lttng_poll_event events;
rcu_register_thread();
+ health_code_update(&health_thread_cmd);
+
ret = lttcomm_listen_unix_sock(client_sock);
if (ret < 0) {
goto error;
kill(ppid, SIGUSR1);
}
+ health_code_update(&health_thread_cmd);
+
while (1) {
DBG("Accepting client command ...");
/* Inifinite blocking call, waiting for transmission */
restart:
+ health_poll_update(&health_thread_cmd);
ret = lttng_poll_wait(&events, -1);
+ health_poll_update(&health_thread_cmd);
if (ret < 0) {
/*
* Restart interrupted system call.
revents = LTTNG_POLL_GETEV(&events, i);
pollfd = LTTNG_POLL_GETFD(&events, i);
+ health_code_update(&health_thread_cmd);
+
/* Thread quit pipe has been closed. Killing thread. */
ret = check_thread_quit_pipe(pollfd, revents);
if (ret) {
DBG("Wait for client response");
+ health_code_update(&health_thread_cmd);
+
sock = lttcomm_accept_unix_sock(client_sock);
if (sock < 0) {
goto error;
cmd_ctx->llm = NULL;
cmd_ctx->session = NULL;
+ health_code_update(&health_thread_cmd);
+
/*
* Data is received from the lttng client. The struct
* lttcomm_session_msg (lsm) contains the command and data request of
continue;
}
+ health_code_update(&health_thread_cmd);
+
// TODO: Validate cmd_ctx including sanity check for
// security purpose.
* informations for the client. The command context struct contains
* everything this function may needs.
*/
- ret = process_client_msg(cmd_ctx);
+ ret = process_client_msg(cmd_ctx, sock, &sock_error);
rcu_thread_offline();
if (ret < 0) {
+ if (sock_error) {
+ ret = close(sock);
+ if (ret) {
+ PERROR("close");
+ }
+ sock = -1;
+ }
/*
* TODO: Inform client somehow of the fatal error. At
* this point, ret < 0 means that a zmalloc failed
- * (ENOMEM). Error detected but still accept command.
+ * (ENOMEM). Error detected but still accept
+ * command, unless a socket error has been
+ * detected.
*/
clean_command_ctx(&cmd_ctx);
continue;
}
+ health_code_update(&health_thread_cmd);
+
DBG("Sending response (size: %d, retcode: %s)",
cmd_ctx->lttng_msg_size,
lttng_strerror(-cmd_ctx->llm->ret_code));
sock = -1;
clean_command_ctx(&cmd_ctx);
+
+ health_code_update(&health_thread_cmd);
}
error:
+ health_reset(&health_thread_cmd);
+
DBG("Client thread dying");
unlink(client_unix_sock_path);
if (client_sock >= 0) {
DEFAULT_GLOBAL_APPS_WAIT_SHM_PATH);
}
+ if (strlen(health_unix_sock_path) == 0) {
+ snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
+ DEFAULT_GLOBAL_HEALTH_UNIX_SOCK);
+ }
+
/* Setup kernel consumerd path */
snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX,
DEFAULT_KCONSUMERD_ERR_SOCK_PATH, rundir);
snprintf(wait_shm_path, PATH_MAX,
DEFAULT_HOME_APPS_WAIT_SHM_PATH, geteuid());
}
+
+ /* Set health check Unix path */
+ if (strlen(health_unix_sock_path) == 0) {
+ snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
+ DEFAULT_HOME_HEALTH_UNIX_SOCK, home_path);
+ }
}
/* Set consumer initial state */
*/
uatomic_set(&relayd_net_seq_idx, 1);
+ /* Init all health thread counters. */
+ health_init(&health_thread_cmd);
+ health_init(&health_thread_kernel);
+ health_init(&health_thread_app_reg);
+
+ /*
+ * Init health counters of the consumer thread. We do a quick hack here to
+ * the state of the consumer health is fine even if the thread is not
+ * started. This is simply to ease our life and has no cost what so ever.
+ */
+ health_init(&kconsumer_data.health);
+ health_poll_update(&kconsumer_data.health);
+ health_init(&ustconsumer32_data.health);
+ health_poll_update(&ustconsumer32_data.health);
+ health_init(&ustconsumer64_data.health);
+ health_poll_update(&ustconsumer64_data.health);
+
+ /* Create thread to manage the client socket */
+ ret = pthread_create(&health_thread, NULL,
+ thread_manage_health, (void *) NULL);
+ if (ret != 0) {
+ PERROR("pthread_create health");
+ goto exit_health;
+ }
+
/* Create thread to manage the client socket */
ret = pthread_create(&client_thread, NULL,
thread_manage_clients, (void *) NULL);
}
exit_client:
+exit_health:
exit:
/*
* cleanup() is called when no other thread is running.