Fix: error logged on partial recvmsg() in MSG_DONTWAIT
[lttng-tools.git] / src / common / sessiond-comm / inet.c
index 7aee725602f1441f72093cc42ffdac7e16257117..57ffbe45c28ed1ae68bded38f4713f901a23deb1 100644 (file)
@@ -15,7 +15,7 @@
  * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#define _GNU_SOURCE
+#define _LGPL_SOURCE
 #include <assert.h>
 #include <limits.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <common/compat/time.h>
+#include <poll.h>
 
-#include <common/defaults.h>
-#include <common/error.h>
+#include <common/common.h>
+#include <common/time.h>
 
 #include "inet.h"
 
+#define RECONNECT_DELAY        200     /* ms */
+
 /*
  * INET protocol operations.
  */
@@ -44,13 +49,16 @@ static const struct lttcomm_proto_ops inet_ops = {
        .sendmsg = lttcomm_sendmsg_inet_sock,
 };
 
+unsigned long lttcomm_inet_tcp_timeout;
+
 /*
  * Creates an PF_INET socket.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 int lttcomm_create_inet_sock(struct lttcomm_sock *sock, int type, int proto)
 {
        int val = 1, ret;
+       unsigned long timeout;
 
        /* Create server socket */
        if ((sock->fd = socket(PF_INET, type, proto)) < 0) {
@@ -68,6 +76,17 @@ int lttcomm_create_inet_sock(struct lttcomm_sock *sock, int type, int proto)
                PERROR("setsockopt inet");
                goto error;
        }
+       timeout = lttcomm_get_network_timeout();
+       if (timeout) {
+               ret = lttcomm_setsockopt_rcv_timeout(sock->fd, timeout);
+               if (ret) {
+                       goto error;
+               }
+               ret = lttcomm_setsockopt_snd_timeout(sock->fd, timeout);
+               if (ret) {
+                       goto error;
+               }
+       }
 
        return 0;
 
@@ -78,35 +97,145 @@ error:
 /*
  * Bind socket and return.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 int lttcomm_bind_inet_sock(struct lttcomm_sock *sock)
 {
-       int ret;
+       return bind(sock->fd,
+                       (const struct sockaddr *) &sock->sockaddr.addr.sin,
+                       sizeof(sock->sockaddr.addr.sin));
+}
 
-       ret = bind(sock->fd, &sock->sockaddr.addr.sin,
+static
+int connect_no_timeout(struct lttcomm_sock *sock)
+{
+       return connect(sock->fd, (struct sockaddr *) &sock->sockaddr.addr.sin,
                        sizeof(sock->sockaddr.addr.sin));
-       if (ret < 0) {
-               PERROR("bind inet");
+}
+
+static
+int connect_with_timeout(struct lttcomm_sock *sock)
+{
+       unsigned long timeout = lttcomm_get_network_timeout();
+       int ret, flags, connect_ret;
+       struct timespec orig_time, cur_time;
+       unsigned long diff_ms;
+
+       ret = fcntl(sock->fd, F_GETFL, 0);
+       if (ret == -1) {
+               PERROR("fcntl");
+               return -1;
        }
+       flags = ret;
 
-       return ret;
+       /* Set socket to nonblock */
+       ret = fcntl(sock->fd, F_SETFL, flags | O_NONBLOCK);
+       if (ret == -1) {
+               PERROR("fcntl");
+               return -1;
+       }
+
+       ret = lttng_clock_gettime(CLOCK_MONOTONIC, &orig_time);
+       if (ret == -1) {
+               PERROR("clock_gettime");
+               return -1;
+       }
+
+       connect_ret = connect(sock->fd,
+               (struct sockaddr *) &sock->sockaddr.addr.sin,
+               sizeof(sock->sockaddr.addr.sin));
+       if (connect_ret == -1 && errno != EAGAIN
+                       && errno != EWOULDBLOCK
+                       && errno != EINPROGRESS) {
+               goto error;
+       } else if (!connect_ret) {
+               /* Connect succeeded */
+               goto success;
+       }
+
+       DBG("Asynchronous connect for sock %d, performing polling with"
+                       " timeout: %lums", sock->fd, timeout);
+       /*
+        * Perform poll loop following EINPROGRESS recommendation from
+        * connect(2) man page.
+        */
+       do {
+               struct pollfd fds;
+
+               fds.fd = sock->fd;
+               fds.events = POLLOUT;
+               fds.revents = 0;
+               ret = poll(&fds, 1, RECONNECT_DELAY);
+               if (ret < 0) {
+                       goto error;
+               } else if (ret > 0) {
+                       int optval;
+                       socklen_t optval_len = sizeof(optval);
+
+                       if (!(fds.revents & POLLOUT)) {
+                               /* Either hup or error */
+                               errno = EPIPE;
+                               goto error;
+                       }
+                       /* got something */
+                       ret = getsockopt(sock->fd, SOL_SOCKET,
+                               SO_ERROR, &optval, &optval_len);
+                       if (ret) {
+                               PERROR("getsockopt");
+                               goto error;
+                       }
+                       if (!optval) {
+                               connect_ret = 0;
+                               goto success;
+                       } else {
+                               /* Get actual connect() errno from opt_val */
+                               errno = optval;
+                               goto error;
+                       }
+               }
+               /* ret == 0: timeout */
+               ret = lttng_clock_gettime(CLOCK_MONOTONIC, &cur_time);
+               if (ret == -1) {
+                       PERROR("clock_gettime");
+                       connect_ret = ret;
+                       goto error;
+               }
+               if (timespec_to_ms(timespec_abs_diff(cur_time, orig_time), &diff_ms) < 0) {
+                       ERR("timespec_to_ms input overflows milliseconds output");
+                       connect_ret = -1;
+                       goto error;
+               }
+       } while (diff_ms < timeout);
+
+       /* Timeout */
+       errno = ETIMEDOUT;
+       connect_ret = -1;
+
+success:
+       /* Restore initial flags */
+       ret = fcntl(sock->fd, F_SETFL, flags);
+       if (ret == -1) {
+               PERROR("fcntl");
+               /* Continue anyway */
+       }
+error:
+       return connect_ret;
 }
 
 /*
  * Connect PF_INET socket.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 int lttcomm_connect_inet_sock(struct lttcomm_sock *sock)
 {
        int ret, closeret;
 
-       ret = connect(sock->fd, (struct sockaddr *) &sock->sockaddr.addr.sin,
-                       sizeof(sock->sockaddr.addr.sin));
+       if (lttcomm_get_network_timeout()) {
+               ret = connect_with_timeout(sock);
+       } else {
+               ret = connect_no_timeout(sock);
+       }
        if (ret < 0) {
-               /*
-                * Don't print message on connect error, because connect is used in
-                * normal execution to detect if sessiond is alive.
-                */
+               PERROR("connect");
                goto error_connect;
        }
 
@@ -125,12 +254,13 @@ error_connect:
  * Do an accept(2) on the sock and return the new lttcomm socket. The socket
  * MUST be bind(2) before.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 struct lttcomm_sock *lttcomm_accept_inet_sock(struct lttcomm_sock *sock)
 {
        int new_fd;
        socklen_t len;
        struct lttcomm_sock *new_sock;
+       unsigned long timeout;
 
        if (sock->proto == LTTCOMM_SOCK_UDP) {
                /*
@@ -154,6 +284,19 @@ struct lttcomm_sock *lttcomm_accept_inet_sock(struct lttcomm_sock *sock)
                PERROR("accept inet");
                goto error;
        }
+       timeout = lttcomm_get_network_timeout();
+       if (timeout) {
+               int ret;
+
+               ret = lttcomm_setsockopt_rcv_timeout(new_fd, timeout);
+               if (ret) {
+                       goto error_close;
+               }
+               ret = lttcomm_setsockopt_snd_timeout(new_fd, timeout);
+               if (ret) {
+                       goto error_close;
+               }
+       }
 
        new_sock->fd = new_fd;
        new_sock->ops = &inet_ops;
@@ -161,6 +304,11 @@ struct lttcomm_sock *lttcomm_accept_inet_sock(struct lttcomm_sock *sock)
 end:
        return new_sock;
 
+error_close:
+       if (close(new_fd) < 0) {
+               PERROR("accept inet close fd");
+       }
+
 error:
        free(new_sock);
        return NULL;
@@ -169,7 +317,7 @@ error:
 /*
  * Make the socket listen using LTTNG_SESSIOND_COMM_MAX_LISTEN.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 int lttcomm_listen_inet_sock(struct lttcomm_sock *sock, int backlog)
 {
        int ret;
@@ -200,7 +348,7 @@ end:
  *
  * Return the size of received data.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 ssize_t lttcomm_recvmsg_inet_sock(struct lttcomm_sock *sock, void *buf,
                size_t len, int flags)
 {
@@ -223,18 +371,31 @@ ssize_t lttcomm_recvmsg_inet_sock(struct lttcomm_sock *sock, void *buf,
                len_last = iov[0].iov_len;
                ret = recvmsg(sock->fd, &msg, flags);
                if (ret > 0) {
+                       if (flags & MSG_DONTWAIT) {
+                               goto end;
+                       }
                        iov[0].iov_base += ret;
                        iov[0].iov_len -= ret;
                        assert(ret <= len_last);
                }
        } while ((ret > 0 && ret < len_last) || (ret < 0 && errno == EINTR));
+
        if (ret < 0) {
+               if (errno == EAGAIN && flags & MSG_DONTWAIT) {
+                       /*
+                        * EAGAIN is expected in non-blocking mode and should
+                        * not be reported as an error. Moreover, if no data
+                        * was read, 0 must not be returned as it would be
+                        * interpreted as an orderly shutdown of the socket.
+                        */
+                       goto end;
+               }
                PERROR("recvmsg inet");
        } else if (ret > 0) {
                ret = len;
        }
        /* Else ret = 0 meaning an orderly shutdown. */
-
+end:
        return ret;
 }
 
@@ -243,8 +404,8 @@ ssize_t lttcomm_recvmsg_inet_sock(struct lttcomm_sock *sock, void *buf,
  *
  * Return the size of sent data.
  */
-__attribute__((visibility("hidden")))
-ssize_t lttcomm_sendmsg_inet_sock(struct lttcomm_sock *sock, void *buf,
+LTTNG_HIDDEN
+ssize_t lttcomm_sendmsg_inet_sock(struct lttcomm_sock *sock, const void *buf,
                size_t len, int flags)
 {
        struct msghdr msg;
@@ -253,7 +414,7 @@ ssize_t lttcomm_sendmsg_inet_sock(struct lttcomm_sock *sock, void *buf,
 
        memset(&msg, 0, sizeof(msg));
 
-       iov[0].iov_base = buf;
+       iov[0].iov_base = (void *) buf;
        iov[0].iov_len = len;
        msg.msg_iov = iov;
        msg.msg_iovlen = 1;
@@ -286,7 +447,7 @@ ssize_t lttcomm_sendmsg_inet_sock(struct lttcomm_sock *sock, void *buf,
 /*
  * Shutdown cleanly and close.
  */
-__attribute__((visibility("hidden")))
+LTTNG_HIDDEN
 int lttcomm_close_inet_sock(struct lttcomm_sock *sock)
 {
        int ret;
@@ -306,3 +467,81 @@ int lttcomm_close_inet_sock(struct lttcomm_sock *sock)
 
        return ret;
 }
+
+/*
+ * Return value read from /proc or else 0 if value is not found.
+ */
+static unsigned long read_proc_value(const char *path)
+{
+       int ret, fd;
+       ssize_t size_ret;
+       long r_val;
+       unsigned long val = 0;
+       char buf[64];
+
+       fd = open(path, O_RDONLY);
+       if (fd < 0) {
+               goto error;
+       }
+
+       size_ret = lttng_read(fd, buf, sizeof(buf));
+       /*
+        * Allow reading a file smaller than buf, but keep space for
+        * final \0.
+        */
+       if (size_ret < 0 || size_ret >= sizeof(buf)) {
+               PERROR("read proc failed");
+               goto error_close;
+       }
+       buf[size_ret] = '\0';
+
+       errno = 0;
+       r_val = strtol(buf, NULL, 10);
+       if (errno != 0 || r_val < -1L) {
+               val = 0;
+               goto error_close;
+       } else {
+               if (r_val > 0) {
+                       val = r_val;
+               }
+       }
+
+error_close:
+       ret = close(fd);
+       if (ret) {
+               PERROR("close /proc value");
+       }
+error:
+       return val;
+}
+
+LTTNG_HIDDEN
+void lttcomm_inet_init(void)
+{
+       unsigned long syn_retries, fin_timeout, syn_timeout, env;
+
+       env = lttcomm_get_network_timeout();
+       if (env) {
+               lttcomm_inet_tcp_timeout = env;
+               goto end;
+       }
+
+       /* Assign default value and see if we can change it. */
+       lttcomm_inet_tcp_timeout = DEFAULT_INET_TCP_TIMEOUT;
+
+       syn_retries = read_proc_value(LTTCOMM_INET_PROC_SYN_RETRIES_PATH);
+       fin_timeout = read_proc_value(LTTCOMM_INET_PROC_FIN_TIMEOUT_PATH);
+
+       syn_timeout = syn_retries * LTTCOMM_INET_SYN_TIMEOUT_FACTOR;
+
+       /*
+        * Get the maximum between the two possible timeout value and use that to
+        * get the maximum with the default timeout.
+        */
+       lttcomm_inet_tcp_timeout = max_t(unsigned long,
+                       max_t(unsigned long, syn_timeout, fin_timeout),
+                       lttcomm_inet_tcp_timeout);
+
+end:
+       DBG("TCP inet operation timeout set to %lu sec", lttcomm_inet_tcp_timeout);
+}
This page took 0.028219 seconds and 4 git commands to generate.