Move health into its own common/ static library
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sun, 15 Sep 2013 22:03:24 +0000 (17:03 -0500)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 9 Oct 2013 13:10:12 +0000 (09:10 -0400)
- Introduce lttng/health-internal.h (not installed)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
configure.ac
include/Makefile.am
include/lttng/health-internal.h [new file with mode: 0644]
src/bin/lttng-sessiond/Makefile.am
src/bin/lttng-sessiond/health-sessiond.h
src/bin/lttng-sessiond/health.c [deleted file]
src/bin/lttng-sessiond/health.h [deleted file]
src/bin/lttng-sessiond/main.c
src/common/Makefile.am
src/common/health/Makefile.am [new file with mode: 0644]
src/common/health/health.c [new file with mode: 0644]

index eee05aacd2a7c1df5fd9f45cc3bf877ba6fd31fb..2b4a340f5e646dfdd4fc1633dd022aa05922ba51 100644 (file)
@@ -354,6 +354,7 @@ AC_CONFIG_FILES([
        src/common/relayd/Makefile
        src/common/testpoint/Makefile
        src/common/index/Makefile
+       src/common/health/Makefile
        src/lib/Makefile
        src/lib/lttng-ctl/Makefile
        src/lib/lttng-ctl/filter/Makefile
index f3413e6dfa376bed65bdfea3db1a22eb0dabf574..15479d4a7163283b0ffd617a7004b5a8179b56c2 100644 (file)
@@ -1,3 +1,8 @@
-lttnginclude_HEADERS = lttng/lttng.h lttng/lttng-error.h lttng/snapshot.h
+lttnginclude_HEADERS = \
+       lttng/lttng.h \
+       lttng/lttng-error.h \
+       lttng/snapshot.h
 
-noinst_HEADERS = lttng/snapshot-internal.h
+noinst_HEADERS = \
+       lttng/snapshot-internal.h \
+       lttng/health-internal.h
diff --git a/include/lttng/health-internal.h b/include/lttng/health-internal.h
new file mode 100644 (file)
index 0000000..764e998
--- /dev/null
@@ -0,0 +1,116 @@
+#ifndef HEALTH_INTERNAL_H
+#define HEALTH_INTERNAL_H
+
+/*
+ * Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
+ * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <assert.h>
+#include <time.h>
+#include <pthread.h>
+#include <urcu/tls-compat.h>
+#include <urcu/uatomic.h>
+#include <urcu/list.h>
+
+/*
+ * These are the value added to the current state depending of the position in
+ * the thread where is either waiting on a poll() or running in the code.
+ */
+#define HEALTH_POLL_VALUE      (1UL << 0)
+#define HEALTH_CODE_VALUE      (1UL << 1)
+
+#define HEALTH_IS_IN_POLL(x)   ((x) & HEALTH_POLL_VALUE)
+
+struct health_app;
+
+enum health_flags {
+       HEALTH_ERROR                     = (1U << 0),
+};
+
+struct health_state {
+       /*
+        * last counter and last_time are only read and updated by the health_check
+        * thread (single updater).
+        */
+       unsigned long last;
+       struct timespec last_time;
+
+       /*
+        * current and flags are updated by multiple threads concurrently.
+        */
+       unsigned long current;          /* progress counter, updated atomically */
+       enum health_flags flags;        /* other flags, updated atomically */
+       int type;                       /* Indicates the nature of the thread. */
+       /* Node of the global TLS state list. */
+       struct cds_list_head node;
+};
+
+/* Declare TLS health state. */
+extern DECLARE_URCU_TLS(struct health_state, health_state);
+
+/*
+ * Update current counter by 1 to indicate that the thread entered or left a
+ * blocking state caused by a poll(). If the counter's value is not an even
+ * number (meaning a code execution flow), an assert() is raised.
+ */
+static inline void health_poll_entry(void)
+{
+       /* Code MUST be in code execution state which is an even number. */
+       assert(!(uatomic_read(&URCU_TLS(health_state).current)
+                               & HEALTH_POLL_VALUE));
+
+       uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE);
+}
+
+/*
+ * Update current counter by 1 indicating the exit of a poll or blocking call.
+ * If the counter's value is not an odd number (a poll execution), an assert()
+ * is raised.
+ */
+static inline void health_poll_exit(void)
+{
+       /* Code MUST be in poll execution state which is an odd number. */
+       assert(uatomic_read(&URCU_TLS(health_state).current)
+                               & HEALTH_POLL_VALUE);
+
+       uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE);
+}
+
+/*
+ * Update current counter by 2 indicates progress in execution of a
+ * thread.
+ */
+static inline void health_code_update(void)
+{
+       uatomic_add(&URCU_TLS(health_state).current, HEALTH_CODE_VALUE);
+}
+
+/*
+ * Set health "error" flag.
+ */
+static inline void health_error(void)
+{
+       uatomic_or(&URCU_TLS(health_state).flags, HEALTH_ERROR);
+}
+
+struct health_app *health_app_create(int nr_types);
+void health_app_destroy(struct health_app *ha);
+int health_check_state(struct health_app *ha, int type);
+void health_register(struct health_app *ha, int type);
+void health_unregister(struct health_app *ha);
+
+#endif /* HEALTH_INTERNAL_H */
index 0fab90f335bc4bd472c8f31b2edf549424d0bb6a..3d96907deac377f4e70ed64c5037ec6e9883f3b0 100644 (file)
@@ -22,7 +22,7 @@ lttng_sessiond_SOURCES = utils.c utils.h \
                        fd-limit.c fd-limit.h \
                        kernel-consumer.c kernel-consumer.h \
                        consumer.h \
-                       health.c health.h health-sessiond.h \
+                       health-sessiond.h \
                        cmd.c cmd.h \
                        buffer-registry.c buffer-registry.h \
                        testpoint.h ht-cleanup.c \
@@ -46,7 +46,8 @@ lttng_sessiond_LDADD = -lrt -lurcu-common -lurcu \
                $(top_builddir)/src/common/libcommon.la \
                $(top_builddir)/src/common/compat/libcompat.la \
                $(top_builddir)/src/common/relayd/librelayd.la \
-               $(top_builddir)/src/common/testpoint/libtestpoint.la
+               $(top_builddir)/src/common/testpoint/libtestpoint.la \
+               $(top_builddir)/src/common/health/libhealth.la
 
 if HAVE_LIBLTTNG_UST_CTL
 lttng_sessiond_LDADD += -llttng-ust-ctl
index 18f922728ed63c7e9b55f89d464904318c176c30..49f9e0b9ca2f0133124403fbfea81ed2186967be 100644 (file)
@@ -18,7 +18,7 @@
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#include "health.h"
+#include <lttng/health-internal.h>
 
 enum health_type {
        HEALTH_TYPE_CMD                 = 0,
diff --git a/src/bin/lttng-sessiond/health.c b/src/bin/lttng-sessiond/health.c
deleted file mode 100644 (file)
index 7e1d473..0000000
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
- * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License, version 2 only, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 51
- * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-#include <assert.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#include <common/defaults.h>
-#include <common/error.h>
-#include <common/macros.h>
-#include <common/sessiond-comm/inet.h>
-
-#include "health.h"
-
-/*
- * An application-specific error state for unregistered thread keeps
- * track of thread errors. A thread reporting a health error, normally
- * unregisters and quits. This makes the TLS health state not available
- * to the health_check_state() call so on unregister we update this
- * global error array so we can keep track of which thread was on error
- * if the TLS health state has been removed.
- */
-struct health_app {
-       /* List of health state, for each application thread */
-       struct cds_list_head list;
-       /*
-        * This lock ensures that TLS memory used for the node and its
-        * container structure don't get reclaimed after the TLS owner
-        * thread exits until we have finished using it.
-        */
-       pthread_mutex_t lock;
-       int nr_types;
-       struct timespec time_delta;
-       /* Health flags containing thread type error state */
-       enum health_flags *flags;
-};
-
-/* Define TLS health state. */
-DEFINE_URCU_TLS(struct health_state, health_state);
-
-struct health_app *health_app_create(int nr_types)
-{
-       struct health_app *ha;
-
-       ha = zmalloc(sizeof(*ha));
-       if (!ha) {
-               return NULL;
-       }
-       ha->flags = zmalloc(sizeof(*ha->flags));
-       if (!ha->flags) {
-               goto error_flags;
-       }
-       CDS_INIT_LIST_HEAD(&ha->list);
-       pthread_mutex_init(&ha->lock, NULL);
-       ha->nr_types = nr_types;
-       ha->time_delta.tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S;
-       ha->time_delta.tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS;
-       return ha;
-
-error_flags:
-       free(ha);
-       return NULL;
-}
-
-void health_app_destroy(struct health_app *ha)
-{
-       free(ha->flags);
-       free(ha);
-}
-
-/*
- * Lock health state global list mutex.
- */
-static void state_lock(struct health_app *ha)
-{
-       pthread_mutex_lock(&ha->lock);
-}
-
-/*
- * Unlock health state global list mutex.
- */
-static void state_unlock(struct health_app *ha)
-{
-       pthread_mutex_unlock(&ha->lock);
-}
-
-/*
- * Set time difference in res from time_a and time_b.
- */
-static void time_diff(const struct timespec *time_a,
-               const struct timespec *time_b, struct timespec *res)
-{
-       if (time_a->tv_nsec - time_b->tv_nsec < 0) {
-               res->tv_sec = time_a->tv_sec - time_b->tv_sec - 1;
-               res->tv_nsec = 1000000000L + time_a->tv_sec - time_b->tv_sec;
-       } else {
-               res->tv_sec = time_a->tv_sec - time_b->tv_sec;
-               res->tv_nsec = time_a->tv_nsec - time_b->tv_nsec;
-       }
-}
-
-/*
- * Return true if time_a - time_b > diff, else false.
- */
-static int time_diff_gt(const struct timespec *time_a,
-               const struct timespec *time_b, const struct timespec *diff)
-{
-       struct timespec res;
-
-       time_diff(time_a, time_b, &res);
-       time_diff(&res, diff, &res);
-
-       if (res.tv_sec > 0) {
-               return 1;
-       } else if (res.tv_sec == 0 && res.tv_nsec > 0) {
-               return 1;
-       }
-
-       return 0;
-}
-
-/*
- * Validate health state. Checks for the error flag or health conditions.
- *
- * Return 0 if health is bad or else 1.
- */
-static int validate_state(struct health_app *ha, struct health_state *state)
-{
-       int retval = 1, ret;
-       unsigned long current, last;
-       struct timespec current_time;
-
-       assert(state);
-
-       last = state->last;
-       current = uatomic_read(&state->current);
-
-       ret = clock_gettime(CLOCK_MONOTONIC, &current_time);
-       if (ret < 0) {
-               PERROR("Error reading time\n");
-               /* error */
-               retval = 0;
-               goto end;
-       }
-
-       /*
-        * Thread is in bad health if flag HEALTH_ERROR is set. It is also in bad
-        * health if, after the delta delay has passed, its the progress counter
-        * has not moved and it has NOT been waiting for a poll() call.
-        */
-       if (uatomic_read(&state->flags) & HEALTH_ERROR) {
-               retval = 0;
-               goto end;
-       }
-
-       /*
-        * Initial condition need to update the last counter and sample time, but
-        * should not check health in this initial case, because we don't know how
-        * much time has passed.
-        */
-       if (state->last_time.tv_sec == 0 && state->last_time.tv_nsec == 0) {
-               /* update last counter and last sample time */
-               state->last = current;
-               memcpy(&state->last_time, &current_time, sizeof(current_time));
-       } else {
-               if (time_diff_gt(&current_time, &state->last_time,
-                               &ha->time_delta)) {
-                       if (current == last && !HEALTH_IS_IN_POLL(current)) {
-                               /* error */
-                               retval = 0;
-                       }
-                       /* update last counter and last sample time */
-                       state->last = current;
-                       memcpy(&state->last_time, &current_time, sizeof(current_time));
-
-                       /* On error, stop right now and notify caller. */
-                       if (retval == 0) {
-                               goto end;
-                       }
-               }
-       }
-
-end:
-       DBG("Health state current %lu, last %lu, ret %d",
-                       current, last, ret);
-       return retval;
-}
-
-/*
- * Check health of a specific health type. Note that if a thread has not yet
- * initialize its health subsystem or has quit, it's considered in a good
- * state.
- *
- * Return 0 if health is bad or else 1.
- */
-int health_check_state(struct health_app *ha, int type)
-{
-       int retval = 1;
-       struct health_state *state;
-
-       assert(type < ha->nr_types);
-
-       state_lock(ha);
-
-       cds_list_for_each_entry(state, &ha->list, node) {
-               int ret;
-
-               if (state->type != type) {
-                       continue;
-               }
-
-               ret = validate_state(ha, state);
-               if (!ret) {
-                       retval = 0;
-                       goto end;
-               }
-       }
-
-       /* Check the global state since some state might not be visible anymore. */
-       if (ha->flags[type] & HEALTH_ERROR) {
-               retval = 0;
-       }
-
-end:
-       state_unlock(ha);
-
-       DBG("Health check for type %d is %s", (int) type,
-                       (retval == 0) ? "BAD" : "GOOD");
-       return retval;
-}
-
-/*
- * Init health state.
- */
-void health_register(struct health_app *ha, int type)
-{
-       assert(type < ha->nr_types);
-
-       /* Init TLS state. */
-       uatomic_set(&URCU_TLS(health_state).last, 0);
-       uatomic_set(&URCU_TLS(health_state).last_time.tv_sec, 0);
-       uatomic_set(&URCU_TLS(health_state).last_time.tv_nsec, 0);
-       uatomic_set(&URCU_TLS(health_state).current, 0);
-       uatomic_set(&URCU_TLS(health_state).flags, 0);
-       uatomic_set(&URCU_TLS(health_state).type, type);
-
-       /* Add it to the global TLS state list. */
-       state_lock(ha);
-       cds_list_add(&URCU_TLS(health_state).node, &ha->list);
-       state_unlock(ha);
-}
-
-/*
- * Remove node from global list.
- */
-void health_unregister(struct health_app *ha)
-{
-       state_lock(ha);
-       /*
-        * On error, set the global_error_state since we are about to remove
-        * the node from the global list.
-        */
-       if (uatomic_read(&URCU_TLS(health_state).flags) & HEALTH_ERROR) {
-               uatomic_set(&ha->flags[URCU_TLS(health_state).type],
-                               HEALTH_ERROR);
-       }
-       cds_list_del(&URCU_TLS(health_state).node);
-       state_unlock(ha);
-}
-
-/*
- * Initiliazie health check subsytem. This should be called before any health
- * register occurs.
- */
-void health_init(struct health_app *ha)
-{
-       /*
-        * Get the maximum value between the default delta value and the TCP
-        * timeout with a safety net of the default health check delta.
-        */
-       ha->time_delta.tv_sec = max_t(unsigned long,
-                       lttcomm_inet_tcp_timeout + DEFAULT_HEALTH_CHECK_DELTA_S,
-                       ha->time_delta.tv_sec);
-       DBG("Health check time delta in seconds set to %lu",
-               ha->time_delta.tv_sec);
-}
diff --git a/src/bin/lttng-sessiond/health.h b/src/bin/lttng-sessiond/health.h
deleted file mode 100644 (file)
index 82cfc87..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License, version 2 only, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 51
- * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _HEALTH_H
-#define _HEALTH_H
-
-#include <assert.h>
-#include <time.h>
-#include <pthread.h>
-#include <urcu/tls-compat.h>
-#include <urcu/uatomic.h>
-#include <urcu/list.h>
-
-/*
- * These are the value added to the current state depending of the position in
- * the thread where is either waiting on a poll() or running in the code.
- */
-#define HEALTH_POLL_VALUE      (1UL << 0)
-#define HEALTH_CODE_VALUE      (1UL << 1)
-
-#define HEALTH_IS_IN_POLL(x)   ((x) & HEALTH_POLL_VALUE)
-
-struct health_app;
-
-enum health_flags {
-       HEALTH_ERROR                     = (1U << 0),
-};
-
-struct health_state {
-       /*
-        * last counter and last_time are only read and updated by the health_check
-        * thread (single updater).
-        */
-       unsigned long last;
-       struct timespec last_time;
-
-       /*
-        * current and flags are updated by multiple threads concurrently.
-        */
-       unsigned long current;          /* progress counter, updated atomically */
-       enum health_flags flags;        /* other flags, updated atomically */
-       int type;                       /* Indicates the nature of the thread. */
-       /* Node of the global TLS state list. */
-       struct cds_list_head node;
-};
-
-/* Declare TLS health state. */
-extern DECLARE_URCU_TLS(struct health_state, health_state);
-
-/*
- * Update current counter by 1 to indicate that the thread entered or left a
- * blocking state caused by a poll(). If the counter's value is not an even
- * number (meaning a code execution flow), an assert() is raised.
- */
-static inline void health_poll_entry(void)
-{
-       /* Code MUST be in code execution state which is an even number. */
-       assert(!(uatomic_read(&URCU_TLS(health_state).current)
-                               & HEALTH_POLL_VALUE));
-
-       uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE);
-}
-
-/*
- * Update current counter by 1 indicating the exit of a poll or blocking call.
- * If the counter's value is not an odd number (a poll execution), an assert()
- * is raised.
- */
-static inline void health_poll_exit(void)
-{
-       /* Code MUST be in poll execution state which is an odd number. */
-       assert(uatomic_read(&URCU_TLS(health_state).current)
-                               & HEALTH_POLL_VALUE);
-
-       uatomic_add(&URCU_TLS(health_state).current, HEALTH_POLL_VALUE);
-}
-
-/*
- * Update current counter by 2 indicates progress in execution of a
- * thread.
- */
-static inline void health_code_update(void)
-{
-       uatomic_add(&URCU_TLS(health_state).current, HEALTH_CODE_VALUE);
-}
-
-/*
- * Set health "error" flag.
- */
-static inline void health_error(void)
-{
-       uatomic_or(&URCU_TLS(health_state).flags, HEALTH_ERROR);
-}
-
-struct health_app *health_app_create(int nr_types);
-void health_app_destroy(struct health_app *ha);
-int health_check_state(struct health_app *ha, int type);
-void health_register(struct health_app *ha, int type);
-void health_unregister(struct health_app *ha);
-void health_init(struct health_app *ha);
-
-#endif /* _HEALTH_H */
index a54c0bf8df0a7c76d1bd39f4a4b99273501cb214..550aa1b83f68d65f01e936d1e81fb5308b0b2b4e 100644 (file)
@@ -4727,7 +4727,6 @@ int main(int argc, char **argv)
                PERROR("health_app_create error");
                goto exit_health_sessiond_cleanup;
        }
-       health_init(health_sessiond);
 
        /* Create thread to manage the client socket */
        ret = pthread_create(&ht_cleanup_thread, NULL,
index 8454b5a51562ca30041c8f9367906c73500dc92b..0c02d533c021105cd73c6d1e105f77b251955fba 100644 (file)
@@ -1,6 +1,6 @@
 AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src
 
-SUBDIRS = compat hashtable kernel-ctl sessiond-comm relayd \
+SUBDIRS = compat health hashtable kernel-ctl sessiond-comm relayd \
                  kernel-consumer ust-consumer testpoint index
 
 AM_CFLAGS = -fno-strict-aliasing
diff --git a/src/common/health/Makefile.am b/src/common/health/Makefile.am
new file mode 100644 (file)
index 0000000..d82f9de
--- /dev/null
@@ -0,0 +1,5 @@
+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src
+
+noinst_LTLIBRARIES = libhealth.la
+
+libhealth_la_SOURCES = health.c
diff --git a/src/common/health/health.c b/src/common/health/health.c
new file mode 100644 (file)
index 0000000..d2414ae
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2012 - David Goulet <dgoulet@efficios.com>
+ * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License, version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <common/defaults.h>
+#include <common/error.h>
+#include <common/macros.h>
+#include <common/sessiond-comm/inet.h>
+
+#include <lttng/health-internal.h>
+
+/*
+ * An application-specific error state for unregistered thread keeps
+ * track of thread errors. A thread reporting a health error, normally
+ * unregisters and quits. This makes the TLS health state not available
+ * to the health_check_state() call so on unregister we update this
+ * global error array so we can keep track of which thread was on error
+ * if the TLS health state has been removed.
+ */
+struct health_app {
+       /* List of health state, for each application thread */
+       struct cds_list_head list;
+       /*
+        * This lock ensures that TLS memory used for the node and its
+        * container structure don't get reclaimed after the TLS owner
+        * thread exits until we have finished using it.
+        */
+       pthread_mutex_t lock;
+       int nr_types;
+       struct timespec time_delta;
+       /* Health flags containing thread type error state */
+       enum health_flags *flags;
+};
+
+/* Define TLS health state. */
+DEFINE_URCU_TLS(struct health_state, health_state);
+
+/*
+ * Initialize health check subsytem.
+ */
+static
+void health_init(struct health_app *ha)
+{
+       /*
+        * Get the maximum value between the default delta value and the TCP
+        * timeout with a safety net of the default health check delta.
+        */
+       ha->time_delta.tv_sec = max_t(unsigned long,
+                       lttcomm_inet_tcp_timeout + DEFAULT_HEALTH_CHECK_DELTA_S,
+                       ha->time_delta.tv_sec);
+       DBG("Health check time delta in seconds set to %lu",
+               ha->time_delta.tv_sec);
+}
+
+struct health_app *health_app_create(int nr_types)
+{
+       struct health_app *ha;
+
+       ha = zmalloc(sizeof(*ha));
+       if (!ha) {
+               return NULL;
+       }
+       ha->flags = zmalloc(sizeof(*ha->flags));
+       if (!ha->flags) {
+               goto error_flags;
+       }
+       CDS_INIT_LIST_HEAD(&ha->list);
+       pthread_mutex_init(&ha->lock, NULL);
+       ha->nr_types = nr_types;
+       ha->time_delta.tv_sec = DEFAULT_HEALTH_CHECK_DELTA_S;
+       ha->time_delta.tv_nsec = DEFAULT_HEALTH_CHECK_DELTA_NS;
+       health_init(ha);
+       return ha;
+
+error_flags:
+       free(ha);
+       return NULL;
+}
+
+void health_app_destroy(struct health_app *ha)
+{
+       free(ha->flags);
+       free(ha);
+}
+
+/*
+ * Lock health state global list mutex.
+ */
+static void state_lock(struct health_app *ha)
+{
+       pthread_mutex_lock(&ha->lock);
+}
+
+/*
+ * Unlock health state global list mutex.
+ */
+static void state_unlock(struct health_app *ha)
+{
+       pthread_mutex_unlock(&ha->lock);
+}
+
+/*
+ * Set time difference in res from time_a and time_b.
+ */
+static void time_diff(const struct timespec *time_a,
+               const struct timespec *time_b, struct timespec *res)
+{
+       if (time_a->tv_nsec - time_b->tv_nsec < 0) {
+               res->tv_sec = time_a->tv_sec - time_b->tv_sec - 1;
+               res->tv_nsec = 1000000000L + time_a->tv_sec - time_b->tv_sec;
+       } else {
+               res->tv_sec = time_a->tv_sec - time_b->tv_sec;
+               res->tv_nsec = time_a->tv_nsec - time_b->tv_nsec;
+       }
+}
+
+/*
+ * Return true if time_a - time_b > diff, else false.
+ */
+static int time_diff_gt(const struct timespec *time_a,
+               const struct timespec *time_b, const struct timespec *diff)
+{
+       struct timespec res;
+
+       time_diff(time_a, time_b, &res);
+       time_diff(&res, diff, &res);
+
+       if (res.tv_sec > 0) {
+               return 1;
+       } else if (res.tv_sec == 0 && res.tv_nsec > 0) {
+               return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Validate health state. Checks for the error flag or health conditions.
+ *
+ * Return 0 if health is bad or else 1.
+ */
+static int validate_state(struct health_app *ha, struct health_state *state)
+{
+       int retval = 1, ret;
+       unsigned long current, last;
+       struct timespec current_time;
+
+       assert(state);
+
+       last = state->last;
+       current = uatomic_read(&state->current);
+
+       ret = clock_gettime(CLOCK_MONOTONIC, &current_time);
+       if (ret < 0) {
+               PERROR("Error reading time\n");
+               /* error */
+               retval = 0;
+               goto end;
+       }
+
+       /*
+        * Thread is in bad health if flag HEALTH_ERROR is set. It is also in bad
+        * health if, after the delta delay has passed, its the progress counter
+        * has not moved and it has NOT been waiting for a poll() call.
+        */
+       if (uatomic_read(&state->flags) & HEALTH_ERROR) {
+               retval = 0;
+               goto end;
+       }
+
+       /*
+        * Initial condition need to update the last counter and sample time, but
+        * should not check health in this initial case, because we don't know how
+        * much time has passed.
+        */
+       if (state->last_time.tv_sec == 0 && state->last_time.tv_nsec == 0) {
+               /* update last counter and last sample time */
+               state->last = current;
+               memcpy(&state->last_time, &current_time, sizeof(current_time));
+       } else {
+               if (time_diff_gt(&current_time, &state->last_time,
+                               &ha->time_delta)) {
+                       if (current == last && !HEALTH_IS_IN_POLL(current)) {
+                               /* error */
+                               retval = 0;
+                       }
+                       /* update last counter and last sample time */
+                       state->last = current;
+                       memcpy(&state->last_time, &current_time, sizeof(current_time));
+
+                       /* On error, stop right now and notify caller. */
+                       if (retval == 0) {
+                               goto end;
+                       }
+               }
+       }
+
+end:
+       DBG("Health state current %lu, last %lu, ret %d",
+                       current, last, ret);
+       return retval;
+}
+
+/*
+ * Check health of a specific health type. Note that if a thread has not yet
+ * initialize its health subsystem or has quit, it's considered in a good
+ * state.
+ *
+ * Return 0 if health is bad or else 1.
+ */
+int health_check_state(struct health_app *ha, int type)
+{
+       int retval = 1;
+       struct health_state *state;
+
+       assert(type < ha->nr_types);
+
+       state_lock(ha);
+
+       cds_list_for_each_entry(state, &ha->list, node) {
+               int ret;
+
+               if (state->type != type) {
+                       continue;
+               }
+
+               ret = validate_state(ha, state);
+               if (!ret) {
+                       retval = 0;
+                       goto end;
+               }
+       }
+
+       /* Check the global state since some state might not be visible anymore. */
+       if (ha->flags[type] & HEALTH_ERROR) {
+               retval = 0;
+       }
+
+end:
+       state_unlock(ha);
+
+       DBG("Health check for type %d is %s", (int) type,
+                       (retval == 0) ? "BAD" : "GOOD");
+       return retval;
+}
+
+/*
+ * Init health state.
+ */
+void health_register(struct health_app *ha, int type)
+{
+       assert(type < ha->nr_types);
+
+       /* Init TLS state. */
+       uatomic_set(&URCU_TLS(health_state).last, 0);
+       uatomic_set(&URCU_TLS(health_state).last_time.tv_sec, 0);
+       uatomic_set(&URCU_TLS(health_state).last_time.tv_nsec, 0);
+       uatomic_set(&URCU_TLS(health_state).current, 0);
+       uatomic_set(&URCU_TLS(health_state).flags, 0);
+       uatomic_set(&URCU_TLS(health_state).type, type);
+
+       /* Add it to the global TLS state list. */
+       state_lock(ha);
+       cds_list_add(&URCU_TLS(health_state).node, &ha->list);
+       state_unlock(ha);
+}
+
+/*
+ * Remove node from global list.
+ */
+void health_unregister(struct health_app *ha)
+{
+       state_lock(ha);
+       /*
+        * On error, set the global_error_state since we are about to remove
+        * the node from the global list.
+        */
+       if (uatomic_read(&URCU_TLS(health_state).flags) & HEALTH_ERROR) {
+               uatomic_set(&ha->flags[URCU_TLS(health_state).type],
+                               HEALTH_ERROR);
+       }
+       cds_list_del(&URCU_TLS(health_state).node);
+       state_unlock(ha);
+}
This page took 0.037495 seconds and 4 git commands to generate.