+static
+int send_reply(int sock, struct ustcomm_ust_reply *lur)
+{
+ ssize_t len;
+
+ len = ustcomm_send_unix_sock(sock, lur, sizeof(*lur));
+ switch (len) {
+ case sizeof(*lur):
+ DBG("message successfully sent");
+ return 0;
+ case -1:
+ if (errno == ECONNRESET) {
+ printf("remote end closed connection\n");
+ return 0;
+ }
+ return -1;
+ default:
+ printf("incorrect message size: %zd\n", len);
+ return -1;
+ }
+}
+
+static
+int handle_register_done(struct sock_info *sock_info)
+{
+ int ret;
+
+ if (sock_info->constructor_sem_posted)
+ return 0;
+ sock_info->constructor_sem_posted = 1;
+ if (uatomic_read(&sem_count) <= 0) {
+ return 0;
+ }
+ ret = uatomic_add_return(&sem_count, -1);
+ if (ret == 0) {
+ ret = sem_post(&constructor_wait);
+ assert(!ret);
+ }
+ return 0;
+}
+
+static
+int handle_message(struct sock_info *sock_info,
+ int sock, struct ustcomm_ust_msg *lum)
+{
+ int ret = 0;
+ const struct lttng_ust_objd_ops *ops;
+ struct ustcomm_ust_reply lur;
+ int shm_fd, wait_fd;
+
+ ust_lock();
+
+ memset(&lur, 0, sizeof(lur));
+
+ if (lttng_ust_comm_should_quit) {
+ ret = -EPERM;
+ goto end;
+ }
+
+ ops = objd_ops(lum->handle);
+ if (!ops) {
+ ret = -ENOENT;
+ goto end;
+ }
+
+ switch (lum->cmd) {
+ case LTTNG_UST_REGISTER_DONE:
+ if (lum->handle == LTTNG_UST_ROOT_HANDLE)
+ ret = handle_register_done(sock_info);
+ else
+ ret = -EINVAL;
+ break;
+ case LTTNG_UST_RELEASE:
+ if (lum->handle == LTTNG_UST_ROOT_HANDLE)
+ ret = -EPERM;
+ else
+ ret = lttng_ust_objd_unref(lum->handle);
+ break;
+ default:
+ if (ops->cmd)
+ ret = ops->cmd(lum->handle, lum->cmd,
+ (unsigned long) &lum->u);
+ else
+ ret = -ENOSYS;
+ break;
+ }
+
+end:
+ lur.handle = lum->handle;
+ lur.cmd = lum->cmd;
+ lur.ret_val = ret;
+ if (ret >= 0) {
+ lur.ret_code = USTCOMM_OK;
+ } else {
+ //lur.ret_code = USTCOMM_SESSION_FAIL;
+ lur.ret_code = ret;
+ }
+ switch (lum->cmd) {
+ case LTTNG_UST_STREAM:
+ /*
+ * Special-case reply to send stream info.
+ * Use lum.u output.
+ */
+ lur.u.stream.memory_map_size = lum->u.stream.memory_map_size;
+ shm_fd = lum->u.stream.shm_fd;
+ wait_fd = lum->u.stream.wait_fd;
+ break;
+ case LTTNG_UST_METADATA:
+ case LTTNG_UST_CHANNEL:
+ lur.u.channel.memory_map_size = lum->u.channel.memory_map_size;
+ shm_fd = lum->u.channel.shm_fd;
+ wait_fd = lum->u.channel.wait_fd;
+ break;
+ case LTTNG_UST_VERSION:
+ lur.u.version = lum->u.version;
+ break;
+ }
+ ret = send_reply(sock, &lur);
+ if (ret < 0) {
+ perror("error sending reply");
+ goto error;
+ }
+
+ if ((lum->cmd == LTTNG_UST_STREAM
+ || lum->cmd == LTTNG_UST_CHANNEL
+ || lum->cmd == LTTNG_UST_METADATA)
+ && lur.ret_code == USTCOMM_OK) {
+ /* we also need to send the file descriptors. */
+ ret = ustcomm_send_fds_unix_sock(sock,
+ &shm_fd, &shm_fd,
+ 1, sizeof(int));
+ if (ret < 0) {
+ perror("send shm_fd");
+ goto error;
+ }
+ ret = ustcomm_send_fds_unix_sock(sock,
+ &wait_fd, &wait_fd,
+ 1, sizeof(int));
+ if (ret < 0) {
+ perror("send wait_fd");
+ goto error;
+ }
+ }
+error:
+ ust_unlock();
+ return ret;
+}
+
+static
+void cleanup_sock_info(struct sock_info *sock_info)
+{
+ int ret;
+
+ if (sock_info->socket != -1) {
+ ret = close(sock_info->socket);
+ if (ret) {
+ ERR("Error closing apps socket");
+ }
+ sock_info->socket = -1;
+ }
+ if (sock_info->root_handle != -1) {
+ ret = lttng_ust_objd_unref(sock_info->root_handle);
+ if (ret) {
+ ERR("Error unref root handle");
+ }
+ sock_info->root_handle = -1;
+ }
+ sock_info->constructor_sem_posted = 0;
+ if (sock_info->wait_shm_mmap) {
+ ret = munmap(sock_info->wait_shm_mmap, sysconf(_SC_PAGE_SIZE));
+ if (ret) {
+ ERR("Error unmapping wait shm");
+ }
+ sock_info->wait_shm_mmap = NULL;
+ }
+}
+
+/*
+ * Using fork to set umask in the child process (not multi-thread safe).
+ * We deal with the shm_open vs ftruncate race (happening when the
+ * sessiond owns the shm and does not let everybody modify it, to ensure
+ * safety against shm_unlink) by simply letting the mmap fail and
+ * retrying after a few seconds.
+ * For global shm, everybody has rw access to it until the sessiond
+ * starts.
+ */
+static
+int get_wait_shm(struct sock_info *sock_info, size_t mmap_size)
+{
+ int wait_shm_fd, ret;
+ pid_t pid;
+
+ /*
+ * Try to open read-only.
+ */
+ wait_shm_fd = shm_open(sock_info->wait_shm_path, O_RDONLY, 0);
+ if (wait_shm_fd >= 0) {
+ goto end;
+ } else if (wait_shm_fd < 0 && errno != ENOENT) {
+ /*
+ * Real-only open did not work, and it's not because the
+ * entry was not present. It's a failure that prohibits
+ * using shm.
+ */
+ ERR("Error opening shm %s", sock_info->wait_shm_path);
+ goto end;
+ }
+ /*
+ * If the open failed because the file did not exist, try
+ * creating it ourself.
+ */
+ pid = fork();
+ if (pid > 0) {
+ int status;
+
+ /*
+ * Parent: wait for child to return, in which case the
+ * shared memory map will have been created.
+ */
+ pid = wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ wait_shm_fd = -1;
+ goto end;
+ }
+ /*
+ * Try to open read-only again after creation.
+ */
+ wait_shm_fd = shm_open(sock_info->wait_shm_path, O_RDONLY, 0);
+ if (wait_shm_fd < 0) {
+ /*
+ * Real-only open did not work. It's a failure
+ * that prohibits using shm.
+ */
+ ERR("Error opening shm %s", sock_info->wait_shm_path);
+ goto end;
+ }
+ goto end;
+ } else if (pid == 0) {
+ int create_mode;
+
+ /* Child */
+ create_mode = S_IRUSR | S_IWUSR | S_IRGRP;
+ if (sock_info->global)
+ create_mode |= S_IROTH | S_IWGRP | S_IWOTH;
+ /*
+ * We're alone in a child process, so we can modify the
+ * process-wide umask.
+ */
+ umask(~create_mode);
+ /*
+ * Try creating shm (or get rw access).
+ * We don't do an exclusive open, because we allow other
+ * processes to create+ftruncate it concurrently.
+ */
+ wait_shm_fd = shm_open(sock_info->wait_shm_path,
+ O_RDWR | O_CREAT, create_mode);
+ if (wait_shm_fd >= 0) {
+ ret = ftruncate(wait_shm_fd, mmap_size);
+ if (ret) {
+ PERROR("ftruncate");
+ exit(EXIT_FAILURE);
+ }
+ exit(EXIT_SUCCESS);
+ }
+ /*
+ * For local shm, we need to have rw access to accept
+ * opening it: this means the local sessiond will be
+ * able to wake us up. For global shm, we open it even
+ * if rw access is not granted, because the root.root
+ * sessiond will be able to override all rights and wake
+ * us up.
+ */
+ if (!sock_info->global && errno != EACCES) {
+ ERR("Error opening shm %s", sock_info->wait_shm_path);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * The shm exists, but we cannot open it RW. Report
+ * success.
+ */
+ exit(EXIT_SUCCESS);
+ } else {
+ return -1;
+ }
+end:
+ if (wait_shm_fd >= 0 && !sock_info->global) {
+ struct stat statbuf;
+
+ /*
+ * Ensure that our user is the owner of the shm file for
+ * local shm. If we do not own the file, it means our
+ * sessiond will not have access to wake us up (there is
+ * probably a rogue process trying to fake our
+ * sessiond). Fallback to polling method in this case.
+ */
+ ret = fstat(wait_shm_fd, &statbuf);
+ if (ret) {
+ PERROR("fstat");
+ goto error_close;
+ }
+ if (statbuf.st_uid != getuid())
+ goto error_close;
+ }
+ return wait_shm_fd;
+
+error_close:
+ ret = close(wait_shm_fd);
+ if (ret) {
+ PERROR("Error closing fd");
+ }
+ return -1;
+}
+
+static
+char *get_map_shm(struct sock_info *sock_info)
+{
+ size_t mmap_size = sysconf(_SC_PAGE_SIZE);
+ int wait_shm_fd, ret;
+ char *wait_shm_mmap;
+
+ wait_shm_fd = get_wait_shm(sock_info, mmap_size);
+ if (wait_shm_fd < 0) {
+ goto error;
+ }
+ wait_shm_mmap = mmap(NULL, mmap_size, PROT_READ,
+ MAP_SHARED, wait_shm_fd, 0);
+ /* close shm fd immediately after taking the mmap reference */
+ ret = close(wait_shm_fd);
+ if (ret) {
+ PERROR("Error closing fd");
+ }
+ if (wait_shm_mmap == MAP_FAILED) {
+ DBG("mmap error (can be caused by race with sessiond). Fallback to poll mode.");
+ goto error;
+ }
+ return wait_shm_mmap;
+
+error:
+ return NULL;
+}
+
+static
+void wait_for_sessiond(struct sock_info *sock_info)
+{
+ int ret;
+
+ ust_lock();
+ if (lttng_ust_comm_should_quit) {
+ goto quit;
+ }
+ if (wait_poll_fallback) {
+ goto error;
+ }
+ if (!sock_info->wait_shm_mmap) {
+ sock_info->wait_shm_mmap = get_map_shm(sock_info);
+ if (!sock_info->wait_shm_mmap)
+ goto error;
+ }
+ ust_unlock();
+
+ DBG("Waiting for %s apps sessiond", sock_info->name);
+ /* Wait for futex wakeup */
+ if (uatomic_read((int32_t *) sock_info->wait_shm_mmap) == 0) {
+ ret = futex_async((int32_t *) sock_info->wait_shm_mmap,
+ FUTEX_WAIT, 0, NULL, NULL, 0);
+ if (ret < 0) {
+ if (errno == EFAULT) {
+ wait_poll_fallback = 1;
+ WARN(
+"Linux kernels 2.6.33 to 3.0 (with the exception of stable versions) "
+"do not support FUTEX_WAKE on read-only memory mappings correctly. "
+"Please upgrade your kernel "
+"(fix is commit 9ea71503a8ed9184d2d0b8ccc4d269d05f7940ae in Linux kernel "
+"mainline). LTTng-UST will use polling mode fallback.");
+ }
+ PERROR("futex");
+ }
+ }
+ return;
+
+quit:
+ ust_unlock();
+ return;
+
+error:
+ ust_unlock();
+ return;
+}
+
+/*
+ * This thread does not allocate any resource, except within
+ * handle_message, within mutex protection. This mutex protects against
+ * fork and exit.
+ * The other moment it allocates resources is at socket connexion, which
+ * is also protected by the mutex.
+ */
+static
+void *ust_listener_thread(void *arg)
+{
+ struct sock_info *sock_info = arg;
+ int sock, ret, prev_connect_failed = 0, has_waited = 0;
+
+ /* Restart trying to connect to the session daemon */
+restart:
+ if (prev_connect_failed) {
+ /* Wait for sessiond availability with pipe */
+ wait_for_sessiond(sock_info);
+ if (has_waited) {
+ has_waited = 0;
+ /*
+ * Sleep for 5 seconds before retrying after a
+ * sequence of failure / wait / failure. This
+ * deals with a killed or broken session daemon.
+ */
+ sleep(5);
+ }
+ has_waited = 1;
+ prev_connect_failed = 0;
+ }
+ ust_lock();
+
+ if (lttng_ust_comm_should_quit) {
+ ust_unlock();
+ goto quit;
+ }
+
+ if (sock_info->socket != -1) {
+ ret = close(sock_info->socket);
+ if (ret) {
+ ERR("Error closing %s apps socket", sock_info->name);
+ }
+ sock_info->socket = -1;
+ }
+
+ /* Register */
+ ret = ustcomm_connect_unix_sock(sock_info->sock_path);
+ if (ret < 0) {
+ ERR("Error connecting to %s apps socket", sock_info->name);
+ prev_connect_failed = 1;
+ /*
+ * If we cannot find the sessiond daemon, don't delay
+ * constructor execution.
+ */
+ ret = handle_register_done(sock_info);
+ assert(!ret);
+ ust_unlock();
+ goto restart;
+ }
+
+ sock_info->socket = sock = ret;
+
+ /*
+ * Create only one root handle per listener thread for the whole
+ * process lifetime.
+ */
+ if (sock_info->root_handle == -1) {
+ ret = lttng_abi_create_root_handle();
+ if (ret < 0) {
+ ERR("Error creating root handle");
+ ust_unlock();
+ goto quit;
+ }
+ sock_info->root_handle = ret;
+ }
+
+ ret = register_app_to_sessiond(sock);
+ if (ret < 0) {
+ ERR("Error registering to %s apps socket", sock_info->name);
+ prev_connect_failed = 1;
+ /*
+ * If we cannot register to the sessiond daemon, don't
+ * delay constructor execution.
+ */
+ ret = handle_register_done(sock_info);
+ assert(!ret);
+ ust_unlock();
+ goto restart;
+ }
+ ust_unlock();
+
+ for (;;) {
+ ssize_t len;
+ struct ustcomm_ust_msg lum;
+
+ len = ustcomm_recv_unix_sock(sock, &lum, sizeof(lum));
+ switch (len) {
+ case 0: /* orderly shutdown */
+ DBG("%s ltt-sessiond has performed an orderly shutdown\n", sock_info->name);
+ goto end;
+ case sizeof(lum):
+ DBG("message received\n");
+ ret = handle_message(sock_info, sock, &lum);
+ if (ret < 0) {
+ ERR("Error handling message for %s socket", sock_info->name);
+ }
+ continue;
+ case -1:
+ if (errno == ECONNRESET) {
+ ERR("%s remote end closed connection\n", sock_info->name);
+ goto end;
+ }
+ goto end;
+ default:
+ ERR("incorrect message size (%s socket): %zd\n", sock_info->name, len);
+ continue;
+ }
+
+ }
+end:
+ goto restart; /* try to reconnect */
+quit:
+ return NULL;
+}
+
+/*
+ * Return values: -1: don't wait. 0: wait forever. 1: timeout wait.
+ */
+static
+int get_timeout(struct timespec *constructor_timeout)
+{
+ long constructor_delay_ms = LTTNG_UST_DEFAULT_CONSTRUCTOR_TIMEOUT_MS;
+ char *str_delay;
+ int ret;
+
+ str_delay = getenv("UST_REGISTER_TIMEOUT");
+ if (str_delay) {
+ constructor_delay_ms = strtol(str_delay, NULL, 10);
+ }
+
+ switch (constructor_delay_ms) {
+ case -1:/* fall-through */
+ case 0:
+ return constructor_delay_ms;
+ default:
+ break;
+ }
+
+ /*
+ * If we are unable to find the current time, don't wait.
+ */
+ ret = clock_gettime(CLOCK_REALTIME, constructor_timeout);
+ if (ret) {
+ return -1;
+ }
+ constructor_timeout->tv_sec += constructor_delay_ms / 1000UL;
+ constructor_timeout->tv_nsec +=
+ (constructor_delay_ms % 1000UL) * 1000000UL;
+ if (constructor_timeout->tv_nsec >= 1000000000UL) {
+ constructor_timeout->tv_sec++;
+ constructor_timeout->tv_nsec -= 1000000000UL;
+ }
+ return 1;
+}
+