X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fust-app.c;h=225d3f86a81a3837b4acbf9ed23c5d3cf01a9416;hp=bb012b5d84fc0e8b92ace19dde174c2dcb633fef;hb=5d2e1e66a968d9e555f9b8b00d0589ebfaf3de32;hpb=915d047cc8abd8a6e1255dba3e877236f8e8be51 diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index bb012b5d8..225d3f86a 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -1,19 +1,18 @@ /* * Copyright (C) 2011 - David Goulet * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; only version 2 - * of the License. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2 only, + * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #define _GNU_SOURCE @@ -26,13 +25,171 @@ #include #include #include +#include #include +#include +#include "fd-limit.h" +#include "health.h" #include "ust-app.h" #include "ust-consumer.h" #include "ust-ctl.h" +/* Next available channel key. */ +static unsigned long next_channel_key; + +/* + * Return the atomically incremented value of next_channel_key. + */ +static inline unsigned long get_next_channel_key(void) +{ + return uatomic_add_return(&next_channel_key, 1); +} + +/* + * Return the consumer socket from the given consumer output with the right + * bitness. On error, returns NULL. + * + * The caller MUST acquire a rcu read side lock and keep it until the socket + * object reference is not needed anymore. + */ +static struct consumer_socket *find_consumer_socket_by_bitness(int bits, + struct consumer_output *consumer) +{ + int consumer_fd; + struct consumer_socket *socket = NULL; + + switch (bits) { + case 64: + consumer_fd = uatomic_read(&ust_consumerd64_fd); + break; + case 32: + consumer_fd = uatomic_read(&ust_consumerd32_fd); + break; + default: + assert(0); + goto end; + } + + socket = consumer_find_socket(consumer_fd, consumer); + if (!socket) { + ERR("Consumer socket fd %d not found in consumer obj %p", + consumer_fd, consumer); + } + +end: + return socket; +} + +/* + * Match function for the hash table lookup. + * + * It matches an ust app event based on three attributes which are the event + * name, the filter bytecode and the loglevel. + */ +static int ht_match_ust_app_event(struct cds_lfht_node *node, const void *_key) +{ + struct ust_app_event *event; + const struct ust_app_ht_key *key; + + assert(node); + assert(_key); + + event = caa_container_of(node, struct ust_app_event, node.node); + key = _key; + + /* Match the 3 elements of the key: name, filter and loglevel. */ + + /* Event name */ + if (strncmp(event->attr.name, key->name, sizeof(event->attr.name)) != 0) { + goto no_match; + } + + /* Event loglevel. */ + if (event->attr.loglevel != key->loglevel) { + if (event->attr.loglevel_type == LTTNG_UST_LOGLEVEL_ALL + && key->loglevel == 0 && event->attr.loglevel == -1) { + /* + * Match is accepted. This is because on event creation, the + * loglevel is set to -1 if the event loglevel type is ALL so 0 and + * -1 are accepted for this loglevel type since 0 is the one set by + * the API when receiving an enable event. + */ + } else { + goto no_match; + } + } + + /* One of the filters is NULL, fail. */ + if ((key->filter && !event->filter) || (!key->filter && event->filter)) { + goto no_match; + } + + if (key->filter && event->filter) { + /* Both filters exists, check length followed by the bytecode. */ + if (event->filter->len != key->filter->len || + memcmp(event->filter->data, key->filter->data, + event->filter->len) != 0) { + goto no_match; + } + } + + /* Match. */ + return 1; + +no_match: + return 0; +} + +/* + * Unique add of an ust app event in the given ht. This uses the custom + * ht_match_ust_app_event match function and the event name as hash. + */ +static void add_unique_ust_app_event(struct ust_app_channel *ua_chan, + struct ust_app_event *event) +{ + struct cds_lfht_node *node_ptr; + struct ust_app_ht_key key; + struct lttng_ht *ht; + + assert(ua_chan); + assert(ua_chan->events); + assert(event); + + ht = ua_chan->events; + key.name = event->attr.name; + key.filter = event->filter; + key.loglevel = event->attr.loglevel; + + node_ptr = cds_lfht_add_unique(ht->ht, + ht->hash_fct(event->node.key, lttng_ht_seed), + ht_match_ust_app_event, &key, &event->node.node); + assert(node_ptr == &event->node.node); +} + +/* + * Close the notify socket from the given RCU head object. This MUST be called + * through a call_rcu(). + */ +static void close_notify_sock_rcu(struct rcu_head *head) +{ + int ret; + struct ust_app_notify_sock_obj *obj = + caa_container_of(head, struct ust_app_notify_sock_obj, head); + + /* Must have a valid fd here. */ + assert(obj->fd >= 0); + + ret = close(obj->fd); + if (ret) { + ERR("close notify sock %d RCU", obj->fd); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + + free(obj); +} + /* * Delete ust context safely. RCU read lock must be held before calling * this function. @@ -40,8 +197,16 @@ static void delete_ust_app_ctx(int sock, struct ust_app_ctx *ua_ctx) { + int ret; + + assert(ua_ctx); + if (ua_ctx->obj) { - ustctl_release_object(sock, ua_ctx->obj); + ret = ustctl_release_object(sock, ua_ctx->obj); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app sock %d release ctx obj handle %d failed with ret %d", + sock, ua_ctx->obj->handle, ret); + } free(ua_ctx->obj); } free(ua_ctx); @@ -55,20 +220,17 @@ static void delete_ust_app_event(int sock, struct ust_app_event *ua_event) { int ret; - struct lttng_ht_iter iter; - struct ust_app_ctx *ua_ctx; - /* Destroy each context of event */ - cds_lfht_for_each_entry(ua_event->ctx->ht, &iter.iter, ua_ctx, - node.node) { - ret = lttng_ht_del(ua_event->ctx, &iter); - assert(!ret); - delete_ust_app_ctx(sock, ua_ctx); - } - lttng_ht_destroy(ua_event->ctx); + assert(ua_event); + + free(ua_event->filter); if (ua_event->obj != NULL) { - ustctl_release_object(sock, ua_event->obj); + ret = ustctl_release_object(sock, ua_event->obj); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app sock %d release event obj failed with ret %d", + sock, ret); + } free(ua_event->obj); } free(ua_event); @@ -79,10 +241,19 @@ void delete_ust_app_event(int sock, struct ust_app_event *ua_event) * this function. */ static -void delete_ust_app_stream(int sock, struct ltt_ust_stream *stream) +void delete_ust_app_stream(int sock, struct ust_app_stream *stream) { + int ret; + + assert(stream); + if (stream->obj) { - ustctl_release_object(sock, stream->obj); + ret = ustctl_release_object(sock, stream->obj); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app sock %d release stream obj failed with ret %d", + sock, ret); + } + lttng_fd_put(LTTNG_FD_APPS, 2); free(stream->obj); } free(stream); @@ -93,13 +264,18 @@ void delete_ust_app_stream(int sock, struct ltt_ust_stream *stream) * this function. */ static -void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan) +void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan, + struct ust_app *app) { int ret; struct lttng_ht_iter iter; struct ust_app_event *ua_event; struct ust_app_ctx *ua_ctx; - struct ltt_ust_stream *stream, *stmp; + struct ust_app_stream *stream, *stmp; + + assert(ua_chan); + + DBG3("UST app deleting channel %s", ua_chan->name); /* Wipe stream */ cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) { @@ -124,45 +300,202 @@ void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan) } lttng_ht_destroy(ua_chan->events); + /* Wipe and free registry. */ + ust_registry_channel_destroy(&ua_chan->session->registry, &ua_chan->registry); + if (ua_chan->obj != NULL) { - ustctl_release_object(sock, ua_chan->obj); + /* Remove channel from application UST object descriptor. */ + iter.iter.node = &ua_chan->ust_objd_node.node; + lttng_ht_del(app->ust_objd, &iter); + ret = ustctl_release_object(sock, ua_chan->obj); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app sock %d release channel obj failed with ret %d", + sock, ret); + } + lttng_fd_put(LTTNG_FD_APPS, 2); free(ua_chan->obj); } free(ua_chan); } +/* + * For a given application and session, push metadata to consumer. The session + * lock MUST be acquired here before calling this. + * + * Return 0 on success else a negative error. + */ +static int push_metadata(struct ust_app *app, struct ust_app_session *ua_sess) +{ + int ret; + char *metadata_str = NULL; + size_t len, offset; + struct consumer_socket *socket; + + assert(app); + assert(ua_sess); + + if (!ua_sess->consumer || !ua_sess->metadata) { + /* No consumer means no stream associated so just return gracefully. */ + ret = 0; + goto end; + } + + rcu_read_lock(); + + /* Get consumer socket to use to push the metadata.*/ + socket = find_consumer_socket_by_bitness(app->bits_per_long, + ua_sess->consumer); + if (!socket) { + ret = -1; + goto error_rcu_unlock; + } + + /* + * TODO: Currently, we hold the socket lock around sampling of the next + * metadata segment to ensure we send metadata over the consumer socket in + * the correct order. This makes the registry lock nest inside the socket + * lock. + * + * Please note that this is a temporary measure: we should move this lock + * back into ust_consumer_push_metadata() when the consumer gets the + * ability to reorder the metadata it receives. + */ + pthread_mutex_lock(socket->lock); + pthread_mutex_lock(&ua_sess->registry.lock); + + offset = ua_sess->registry.metadata_len_sent; + len = ua_sess->registry.metadata_len - ua_sess->registry.metadata_len_sent; + if (len == 0) { + DBG3("No metadata to push for session id %d", ua_sess->id); + ret = 0; + goto error_reg_unlock; + } + assert(len > 0); + + /* Allocate only what we have to send. */ + metadata_str = zmalloc(len); + if (!metadata_str) { + PERROR("zmalloc ust app metadata string"); + ret = -ENOMEM; + goto error_reg_unlock; + } + /* Copy what we haven't send out. */ + memcpy(metadata_str, ua_sess->registry.metadata + offset, len); + + pthread_mutex_unlock(&ua_sess->registry.lock); + + ret = ust_consumer_push_metadata(socket, ua_sess, metadata_str, len, + offset); + if (ret < 0) { + pthread_mutex_unlock(socket->lock); + goto error_rcu_unlock; + } + + /* Update len sent of the registry. */ + pthread_mutex_lock(&ua_sess->registry.lock); + ua_sess->registry.metadata_len_sent += len; + pthread_mutex_unlock(&ua_sess->registry.lock); + pthread_mutex_unlock(socket->lock); + + rcu_read_unlock(); + free(metadata_str); + return 0; + +error_reg_unlock: + pthread_mutex_unlock(&ua_sess->registry.lock); + pthread_mutex_unlock(socket->lock); +error_rcu_unlock: + rcu_read_unlock(); + free(metadata_str); +end: + return ret; +} + +/* + * Send to the consumer a close metadata command for the given session. Once + * done, the metadata channel is deleted and the session metadata pointer is + * nullified. The session lock MUST be acquired here unless the application is + * in the destroy path. + * + * Return 0 on success else a negative value. + */ +static int close_metadata(struct ust_app *app, struct ust_app_session *ua_sess) +{ + int ret; + struct consumer_socket *socket; + + assert(app); + assert(ua_sess); + + /* Ignore if no metadata. Valid since it can be called on unregister. */ + if (!ua_sess->metadata) { + ret = 0; + goto error; + } + + rcu_read_lock(); + + /* Get consumer socket to use to push the metadata.*/ + socket = find_consumer_socket_by_bitness(app->bits_per_long, + ua_sess->consumer); + if (!socket) { + ret = -1; + goto error_rcu_unlock; + } + + ret = ust_consumer_close_metadata(socket, ua_sess->metadata); + if (ret < 0) { + goto error_rcu_unlock; + } + +error_rcu_unlock: + /* Destroy metadata on our side since we must not use it anymore. */ + delete_ust_app_channel(-1, ua_sess->metadata, app); + ua_sess->metadata = NULL; + + rcu_read_unlock(); +error: + return ret; +} + /* * Delete ust app session safely. RCU read lock must be held before calling * this function. */ static -void delete_ust_app_session(int sock, struct ust_app_session *ua_sess) +void delete_ust_app_session(int sock, struct ust_app_session *ua_sess, + struct ust_app *app) { int ret; struct lttng_ht_iter iter; struct ust_app_channel *ua_chan; + assert(ua_sess); + if (ua_sess->metadata) { - if (ua_sess->metadata->stream_obj) { - ustctl_release_object(sock, ua_sess->metadata->stream_obj); - free(ua_sess->metadata->stream_obj); - } - if (ua_sess->metadata->obj) { - ustctl_release_object(sock, ua_sess->metadata->obj); - free(ua_sess->metadata->obj); - } + /* Push metadata for application before freeing the application. */ + (void) push_metadata(app, ua_sess); + + /* And ask to close it for this session. */ + (void) close_metadata(app, ua_sess); } cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, node.node) { ret = lttng_ht_del(ua_sess->channels, &iter); assert(!ret); - delete_ust_app_channel(sock, ua_chan); + delete_ust_app_channel(sock, ua_chan, app); } lttng_ht_destroy(ua_sess->channels); + ust_registry_session_destroy(&ua_sess->registry); + if (ua_sess->handle != -1) { - ustctl_release_handle(sock, ua_sess->handle); + ret = ustctl_release_handle(sock, ua_sess->handle); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app sock %d release session handle failed with ret %d", + sock, ret); + } } free(ua_sess); } @@ -175,32 +508,43 @@ static void delete_ust_app(struct ust_app *app) { int ret, sock; - struct lttng_ht_iter iter; - struct ust_app_session *ua_sess; + struct ust_app_session *ua_sess, *tmp_ua_sess; rcu_read_lock(); /* Delete ust app sessions info */ - sock = app->key.sock; - app->key.sock = -1; + sock = app->sock; + app->sock = -1; + + lttng_ht_destroy(app->sessions); /* Wipe sessions */ - cds_lfht_for_each_entry(app->sessions->ht, &iter.iter, ua_sess, - node.node) { - ret = lttng_ht_del(app->sessions, &iter); - assert(!ret); - delete_ust_app_session(app->key.sock, ua_sess); + cds_list_for_each_entry_safe(ua_sess, tmp_ua_sess, &app->teardown_head, + teardown_node) { + /* Free every object in the session and the session. */ + delete_ust_app_session(sock, ua_sess, app); } - lttng_ht_destroy(app->sessions); /* - * Wait until we have removed the key from the sock hash table before - * closing this socket, otherwise an application could re-use the socket ID - * and race with the teardown, using the same hash table entry. + * Wait until we have deleted the application from the sock hash table + * before closing this socket, otherwise an application could re-use the + * socket ID and race with the teardown, using the same hash table entry. + * + * It's OK to leave the close in call_rcu. We want it to stay unique for + * all RCU readers that could run concurrently with unregister app, + * therefore we _need_ to only close that socket after a grace period. So + * it should stay in this RCU callback. + * + * This close() is a very important step of the synchronization model so + * every modification to this function must be carefully reviewed. */ - close(sock); + ret = close(sock); + if (ret) { + PERROR("close"); + } + lttng_fd_put(LTTNG_FD_APPS, 1); - DBG2("UST app pid %d deleted", app->key.pid); + DBG2("UST app pid %d deleted", app->pid); free(app); rcu_read_unlock(); @@ -215,17 +559,44 @@ void delete_ust_app_rcu(struct rcu_head *head) struct lttng_ht_node_ulong *node = caa_container_of(head, struct lttng_ht_node_ulong, head); struct ust_app *app = - caa_container_of(node, struct ust_app, node); + caa_container_of(node, struct ust_app, pid_n); - DBG3("Call RCU deleting app PID %d", app->key.pid); + DBG3("Call RCU deleting app PID %d", app->pid); delete_ust_app(app); } +/* + * Delete the session from the application ht and delete the data structure by + * freeing every object inside and releasing them. + */ +static void destroy_app_session(struct ust_app *app, + struct ust_app_session *ua_sess) +{ + int ret; + struct lttng_ht_iter iter; + + assert(app); + assert(ua_sess); + + iter.iter.node = &ua_sess->node.node; + ret = lttng_ht_del(app->sessions, &iter); + if (ret) { + /* Already scheduled for teardown. */ + goto end; + } + + /* Once deleted, free the data structure. */ + delete_ust_app_session(app->sock, ua_sess, app); + +end: + return; +} + /* * Alloc new UST app session. */ static -struct ust_app_session *alloc_ust_app_session(void) +struct ust_app_session *alloc_ust_app_session(struct ust_app *app) { struct ust_app_session *ua_sess; @@ -233,15 +604,28 @@ struct ust_app_session *alloc_ust_app_session(void) ua_sess = zmalloc(sizeof(struct ust_app_session)); if (ua_sess == NULL) { PERROR("malloc"); - goto error; + goto error_free; } ua_sess->handle = -1; ua_sess->channels = lttng_ht_new(0, LTTNG_HT_TYPE_STRING); + pthread_mutex_init(&ua_sess->lock, NULL); + if (ust_registry_session_init(&ua_sess->registry, app, + app->bits_per_long, + app->uint8_t_alignment, + app->uint16_t_alignment, + app->uint32_t_alignment, + app->uint64_t_alignment, + app->long_alignment, + app->byte_order)) { + goto error; + } return ua_sess; error: + free(ua_sess); +error_free: return NULL; } @@ -250,7 +634,8 @@ error: */ static struct ust_app_channel *alloc_ust_app_channel(char *name, - struct lttng_ust_channel *attr) + struct ust_app_session *ua_sess, + struct lttng_ust_channel_attr *attr) { struct ust_app_channel *ua_chan; @@ -267,16 +652,28 @@ struct ust_app_channel *alloc_ust_app_channel(char *name, ua_chan->enabled = 1; ua_chan->handle = -1; + ua_chan->key = get_next_channel_key(); ua_chan->ctx = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); ua_chan->events = lttng_ht_new(0, LTTNG_HT_TYPE_STRING); lttng_ht_node_init_str(&ua_chan->node, ua_chan->name); CDS_INIT_LIST_HEAD(&ua_chan->streams.head); + /* Initialize UST registry. */ + ust_registry_channel_init(&ua_sess->registry, &ua_chan->registry); + /* Copy attributes */ if (attr) { - memcpy(&ua_chan->attr, attr, sizeof(ua_chan->attr)); + /* Translate from lttng_ust_channel to ustctl_consumer_channel_attr. */ + ua_chan->attr.subbuf_size = attr->subbuf_size; + ua_chan->attr.num_subbuf = attr->num_subbuf; + ua_chan->attr.overwrite = attr->overwrite; + ua_chan->attr.switch_timer_interval = attr->switch_timer_interval; + ua_chan->attr.read_timer_interval = attr->read_timer_interval; + ua_chan->attr.output = attr->output; } + /* By default, the channel is a per cpu channel. */ + ua_chan->attr.type = LTTNG_UST_CHAN_PER_CPU; DBG3("UST app channel %s allocated", ua_chan->name); @@ -286,6 +683,28 @@ error: return NULL; } +/* + * Allocate and initialize a UST app stream. + * + * Return newly allocated stream pointer or NULL on error. + */ +struct ust_app_stream *ust_app_alloc_stream(void) +{ + struct ust_app_stream *stream = NULL; + + stream = zmalloc(sizeof(*stream)); + if (stream == NULL) { + PERROR("zmalloc ust app stream"); + goto error; + } + + /* Zero could be a valid value for a handle so flag it to -1. */ + stream->handle = -1; + +error: + return stream; +} + /* * Alloc new UST app event. */ @@ -305,7 +724,6 @@ struct ust_app_event *alloc_ust_app_event(char *name, ua_event->enabled = 1; strncpy(ua_event->name, name, sizeof(ua_event->name)); ua_event->name[sizeof(ua_event->name) - 1] = '\0'; - ua_event->ctx = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); lttng_ht_node_init_str(&ua_event->node, ua_event->name); /* Copy attributes */ @@ -344,6 +762,29 @@ error: return ua_ctx; } +/* + * Allocate a filter and copy the given original filter. + * + * Return allocated filter or NULL on error. + */ +static struct lttng_ust_filter_bytecode *alloc_copy_ust_app_filter( + struct lttng_ust_filter_bytecode *orig_f) +{ + struct lttng_ust_filter_bytecode *filter = NULL; + + /* Copy filter bytecode */ + filter = zmalloc(sizeof(*filter) + orig_f->len); + if (!filter) { + PERROR("zmalloc alloc ust app filter"); + goto error; + } + + memcpy(filter, orig_f, sizeof(*filter) + orig_f->len); + +error: + return filter; +} + /* * Find an ust_app using the sock and return it. RCU read side lock must be * held before calling this helper function. @@ -352,73 +793,147 @@ static struct ust_app *find_app_by_sock(int sock) { struct lttng_ht_node_ulong *node; - struct ust_app_key *key; struct lttng_ht_iter iter; - lttng_ht_lookup(ust_app_sock_key_map, (void *)((unsigned long) sock), - &iter); - node = lttng_ht_iter_get_node_ulong(&iter); - if (node == NULL) { - DBG2("UST app find by sock %d key not found", sock); - goto error; - } - key = caa_container_of(node, struct ust_app_key, node); - - lttng_ht_lookup(ust_app_ht, (void *)((unsigned long) key->pid), &iter); + lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { DBG2("UST app find by sock %d not found", sock); goto error; } - return caa_container_of(node, struct ust_app, node); + + return caa_container_of(node, struct ust_app, sock_n); error: return NULL; } /* - * Create the channel context on the tracer. + * Find an ust_app using the notify sock and return it. RCU read side lock must + * be held before calling this helper function. */ -static -int create_ust_channel_context(struct ust_app_channel *ua_chan, - struct ust_app_ctx *ua_ctx, struct ust_app *app) +static struct ust_app *find_app_by_notify_sock(int sock) { - int ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; - ret = ustctl_add_context(app->key.sock, &ua_ctx->ctx, - ua_chan->obj, &ua_ctx->obj); - if (ret < 0) { + lttng_ht_lookup(ust_app_ht_by_notify_sock, (void *)((unsigned long) sock), + &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG2("UST app find by notify sock %d not found", sock); goto error; } - ua_ctx->handle = ua_ctx->obj->handle; - - DBG2("UST app context created successfully for channel %s", ua_chan->name); + return caa_container_of(node, struct ust_app, notify_sock_n); error: - return ret; + return NULL; } /* - * Create the event context on the tracer. + * Lookup for an ust app event based on event name, filter bytecode and the + * event loglevel. + * + * Return an ust_app_event object or NULL on error. */ -static -int create_ust_event_context(struct ust_app_event *ua_event, - struct ust_app_ctx *ua_ctx, struct ust_app *app) +static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht, + char *name, struct lttng_ust_filter_bytecode *filter, int loglevel) { - int ret; + struct lttng_ht_iter iter; + struct lttng_ht_node_str *node; + struct ust_app_event *event = NULL; + struct ust_app_ht_key key; + + assert(name); + assert(ht); + + /* Setup key for event lookup. */ + key.name = name; + key.filter = filter; + key.loglevel = loglevel; + + /* Lookup using the event name as hash and a custom match fct. */ + cds_lfht_lookup(ht->ht, ht->hash_fct((void *) name, lttng_ht_seed), + ht_match_ust_app_event, &key, &iter.iter); + node = lttng_ht_iter_get_node_str(&iter); + if (node == NULL) { + goto end; + } - ret = ustctl_add_context(app->key.sock, &ua_ctx->ctx, - ua_event->obj, &ua_ctx->obj); + event = caa_container_of(node, struct ust_app_event, node); + +end: + return event; +} + +/* + * Create the channel context on the tracer. + * + * Called with UST app session lock held. + */ +static +int create_ust_channel_context(struct ust_app_channel *ua_chan, + struct ust_app_ctx *ua_ctx, struct ust_app *app) +{ + int ret; + + health_code_update(); + + ret = ustctl_add_context(app->sock, &ua_ctx->ctx, + ua_chan->obj, &ua_ctx->obj); if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app create channel context failed for app (pid: %d) " + "with ret %d", app->pid, ret); + } else { + DBG3("UST app disable event failed. Application is dead."); + } goto error; } ua_ctx->handle = ua_ctx->obj->handle; - DBG2("UST app context created successfully for event %s", ua_event->name); + DBG2("UST app context handle %d created successfully for channel %s", + ua_ctx->handle, ua_chan->name); + +error: + health_code_update(); + return ret; +} + +/* + * Set the filter on the tracer. + */ +static +int set_ust_event_filter(struct ust_app_event *ua_event, + struct ust_app *app) +{ + int ret; + + health_code_update(); + + if (!ua_event->filter) { + ret = 0; + goto error; + } + + ret = ustctl_set_filter(app->sock, ua_event->filter, + ua_event->obj); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app event %s filter failed for app (pid: %d) " + "with ret %d", ua_event->attr.name, app->pid, ret); + } else { + DBG3("UST app filter event failed. Application is dead."); + } + goto error; + } + + DBG2("UST filter set successfully for event %s", ua_event->name); error: + health_code_update(); return ret; } @@ -430,18 +945,25 @@ static int disable_ust_event(struct ust_app *app, { int ret; - ret = ustctl_disable(app->key.sock, ua_event->obj); + health_code_update(); + + ret = ustctl_disable(app->sock, ua_event->obj); if (ret < 0) { - ERR("UST app event %s disable failed for app (pid: %d) " - "and session handle %d with ret %d", - ua_event->attr.name, app->key.pid, ua_sess->handle, ret); + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app event %s disable failed for app (pid: %d) " + "and session handle %d with ret %d", + ua_event->attr.name, app->pid, ua_sess->handle, ret); + } else { + DBG3("UST app disable event failed. Application is dead."); + } goto error; } DBG2("UST app event %s disabled successfully for app (pid: %d)", - ua_event->attr.name, app->key.pid); + ua_event->attr.name, app->pid); error: + health_code_update(); return ret; } @@ -453,18 +975,25 @@ static int disable_ust_channel(struct ust_app *app, { int ret; - ret = ustctl_disable(app->key.sock, ua_chan->obj); + health_code_update(); + + ret = ustctl_disable(app->sock, ua_chan->obj); if (ret < 0) { - ERR("UST app channel %s disable failed for app (pid: %d) " - "and session handle %d with ret %d", - ua_chan->name, app->key.pid, ua_sess->handle, ret); + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app channel %s disable failed for app (pid: %d) " + "and session handle %d with ret %d", + ua_chan->name, app->pid, ua_sess->handle, ret); + } else { + DBG3("UST app disable channel failed. Application is dead."); + } goto error; } DBG2("UST app channel %s disabled successfully for app (pid: %d)", - ua_chan->name, app->key.pid); + ua_chan->name, app->pid); error: + health_code_update(); return ret; } @@ -476,20 +1005,27 @@ static int enable_ust_channel(struct ust_app *app, { int ret; - ret = ustctl_enable(app->key.sock, ua_chan->obj); + health_code_update(); + + ret = ustctl_enable(app->sock, ua_chan->obj); if (ret < 0) { - ERR("UST app channel %s enable failed for app (pid: %d) " - "and session handle %d with ret %d", - ua_chan->name, app->key.pid, ua_sess->handle, ret); + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app channel %s enable failed for app (pid: %d) " + "and session handle %d with ret %d", + ua_chan->name, app->pid, ua_sess->handle, ret); + } else { + DBG3("UST app enable channel failed. Application is dead."); + } goto error; } ua_chan->enabled = 1; DBG2("UST app channel %s enabled successfully for app (pid: %d)", - ua_chan->name, app->key.pid); + ua_chan->name, app->pid); error: + health_code_update(); return ret; } @@ -501,96 +1037,121 @@ static int enable_ust_event(struct ust_app *app, { int ret; - ret = ustctl_enable(app->key.sock, ua_event->obj); + health_code_update(); + + ret = ustctl_enable(app->sock, ua_event->obj); if (ret < 0) { - ERR("UST app event %s enable failed for app (pid: %d) " - "and session handle %d with ret %d", - ua_event->attr.name, app->key.pid, ua_sess->handle, ret); + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app event %s enable failed for app (pid: %d) " + "and session handle %d with ret %d", + ua_event->attr.name, app->pid, ua_sess->handle, ret); + } else { + DBG3("UST app enable event failed. Application is dead."); + } goto error; } DBG2("UST app event %s enabled successfully for app (pid: %d)", - ua_event->attr.name, app->key.pid); + ua_event->attr.name, app->pid); error: + health_code_update(); return ret; } /* - * Open metadata onto the UST tracer for a UST session. + * Create the specified channel onto the UST tracer for a UST session. This + * MUST be called with UST app session lock held. + * + * Return 0 on success. On error, a negative value is returned. */ -static int open_ust_metadata(struct ust_app *app, - struct ust_app_session *ua_sess) +static int create_ust_channel(struct ust_app *app, + struct ust_app_session *ua_sess, struct ust_app_channel *ua_chan, + struct consumer_output *consumer) { int ret; - struct lttng_ust_channel_attr uattr; - - uattr.overwrite = ua_sess->metadata->attr.overwrite; - uattr.subbuf_size = ua_sess->metadata->attr.subbuf_size; - uattr.num_subbuf = ua_sess->metadata->attr.num_subbuf; - uattr.switch_timer_interval = - ua_sess->metadata->attr.switch_timer_interval; - uattr.read_timer_interval = - ua_sess->metadata->attr.read_timer_interval; - uattr.output = ua_sess->metadata->attr.output; - - /* UST tracer metadata creation */ - ret = ustctl_open_metadata(app->key.sock, ua_sess->handle, &uattr, - &ua_sess->metadata->obj); - if (ret < 0) { - ERR("UST app open metadata failed for app pid:%d with ret %d", - app->key.pid, ret); + unsigned int nb_fd = 0; + struct consumer_socket *socket; + struct ust_app_stream *stream, *stmp; + + assert(app); + assert(ua_sess); + assert(ua_chan); + assert(consumer); + + rcu_read_lock(); + health_code_update(); + + /* Get the right consumer socket for the application. */ + socket = find_consumer_socket_by_bitness(app->bits_per_long, consumer); + if (!socket) { + ret = -1; goto error; } - ua_sess->metadata->handle = ua_sess->metadata->obj->handle; + health_code_update(); -error: - return ret; -} + /* + * Ask consumer to create channel. The consumer will return the number of + * stream we have to expect. + */ + ret = ust_consumer_ask_channel(ua_sess, ua_chan, consumer, socket); + if (ret < 0) { + goto error; + } -/* - * Create stream onto the UST tracer for a UST session. - */ -static int create_ust_stream(struct ust_app *app, - struct ust_app_session *ua_sess) -{ - int ret; + /* + * Compute the number of fd needed before receiving them. It must be 2 per + * stream (2 being the default value here). + */ + nb_fd = DEFAULT_UST_STREAM_FD_NUM * ua_chan->expected_stream_count; - ret = ustctl_create_stream(app->key.sock, ua_sess->metadata->obj, - &ua_sess->metadata->stream_obj); + /* Reserve the amount of file descriptor we need. */ + ret = lttng_fd_get(LTTNG_FD_APPS, nb_fd); if (ret < 0) { - ERR("UST create metadata stream failed"); - goto error; + ERR("Exhausted number of available FD upon create channel"); + goto error_fd_get; } -error: - return ret; -} + health_code_update(); -/* - * Create the specified channel onto the UST tracer for a UST session. - */ -static int create_ust_channel(struct ust_app *app, - struct ust_app_session *ua_sess, struct ust_app_channel *ua_chan) -{ - int ret; + /* + * Now get the channel from the consumer. This call wil populate the stream + * list of that channel and set the ust object. + */ + ret = ust_consumer_get_channel(socket, ua_chan); + if (ret < 0) { + goto error_destroy; + } - /* TODO: remove cast and use lttng-ust-abi.h */ - ret = ustctl_create_channel(app->key.sock, ua_sess->handle, - (struct lttng_ust_channel_attr *)&ua_chan->attr, &ua_chan->obj); + /* Send channel to the application. */ + ret = ust_consumer_send_channel_to_ust(app, ua_sess, ua_chan); if (ret < 0) { - ERR("Creating channel %s for app (pid: %d, sock: %d) " - "and session handle %d with ret %d", - ua_chan->name, app->key.pid, app->key.sock, - ua_sess->handle, ret); goto error; } - ua_chan->handle = ua_chan->obj->handle; + health_code_update(); + + /* Send all streams to application. */ + cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) { + ret = ust_consumer_send_stream_to_ust(app, ua_chan, stream); + if (ret < 0) { + goto error; + } + /* We don't need the stream anymore once sent to the tracer. */ + cds_list_del(&stream->list); + delete_ust_app_stream(-1, stream); + } + + /* Flag the channel that it is sent to the application. */ + ua_chan->is_sent = 1; + /* Assign session to channel. */ + ua_chan->session = ua_sess; + /* Initialize ust objd object using the received handle and add it. */ + lttng_ht_node_init_ulong(&ua_chan->ust_objd_node, ua_chan->handle); + lttng_ht_add_unique_ulong(app->ust_objd, &ua_chan->ust_objd_node); - DBG2("UST app channel %s created successfully for pid:%d and sock:%d", - ua_chan->name, app->key.pid, app->key.sock); + health_code_update(); /* If channel is not enabled, disable it on the tracer */ if (!ua_chan->enabled) { @@ -600,12 +1161,29 @@ static int create_ust_channel(struct ust_app *app, } } + rcu_read_unlock(); + return 0; + +error_destroy: + lttng_fd_put(LTTNG_FD_APPS, nb_fd); +error_fd_get: + /* + * Initiate a destroy channel on the consumer since we had an error + * handling it on our side. The return value is of no importance since we + * already have a ret value set by the previous error that we need to + * return. + */ + (void) ust_consumer_destroy_channel(socket, ua_chan); error: + health_code_update(); + rcu_read_unlock(); return ret; } /* * Create the specified event onto the UST tracer for a UST session. + * + * Should be called with session mutex held. */ static int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess, @@ -613,23 +1191,35 @@ int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess, { int ret = 0; + health_code_update(); + /* Create UST event on tracer */ - ret = ustctl_create_event(app->key.sock, &ua_event->attr, ua_chan->obj, + ret = ustctl_create_event(app->sock, &ua_event->attr, ua_chan->obj, &ua_event->obj); if (ret < 0) { - if (ret == -EEXIST) { - ret = 0; - goto error; + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("Error ustctl create event %s for app pid: %d with ret %d", + ua_event->attr.name, app->pid, ret); + } else { + DBG3("UST app create event failed. Application is dead."); } - ERR("Error ustctl create event %s for app pid: %d with ret %d", - ua_event->attr.name, app->key.pid, ret); goto error; } ua_event->handle = ua_event->obj->handle; DBG2("UST app event %s created successfully for pid:%d", - ua_event->attr.name, app->key.pid); + ua_event->attr.name, app->pid); + + health_code_update(); + + /* Set filter if one is present. */ + if (ua_event->filter) { + ret = set_ust_event_filter(ua_event, app); + if (ret < 0) { + goto error; + } + } /* If event not enabled, disable it on the tracer */ if (ua_event->enabled == 0) { @@ -641,10 +1231,10 @@ int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess, * just created it. */ switch (ret) { - case -EPERM: + case -LTTNG_UST_ERR_PERM: /* Code flow problem */ assert(0); - case -EEXIST: + case -LTTNG_UST_ERR_EXIST: /* It's OK for our use case. */ ret = 0; break; @@ -656,6 +1246,7 @@ int create_ust_event(struct ust_app *app, struct ust_app_session *ua_sess, } error: + health_code_update(); return ret; } @@ -665,10 +1256,6 @@ error: static void shadow_copy_event(struct ust_app_event *ua_event, struct ltt_ust_event *uevent) { - struct lttng_ht_iter iter; - struct ltt_ust_context *uctx; - struct ust_app_ctx *ua_ctx; - strncpy(ua_event->name, uevent->attr.name, sizeof(ua_event->name)); ua_event->name[sizeof(ua_event->name) - 1] = '\0'; @@ -677,16 +1264,10 @@ static void shadow_copy_event(struct ust_app_event *ua_event, /* Copy event attributes */ memcpy(&ua_event->attr, &uevent->attr, sizeof(ua_event->attr)); - cds_lfht_for_each_entry(uevent->ctx->ht, &iter.iter, uctx, node.node) { - ua_ctx = alloc_ust_app_ctx(&uctx->ctx); - if (ua_ctx == NULL) { - /* malloc() failed. We should simply stop */ - return; - } - - lttng_ht_node_init_ulong(&ua_ctx->node, - (unsigned long) ua_ctx->ctx.ctx); - lttng_ht_add_unique_ulong(ua_event->ctx, &ua_ctx->node); + /* Copy filter bytecode */ + if (uevent->filter) { + ua_event->filter = alloc_copy_ust_app_filter(uevent->filter); + /* Filter might be NULL here in case of ENONEM. */ } } @@ -697,7 +1278,6 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, struct ltt_ust_channel *uchan) { struct lttng_ht_iter iter; - struct lttng_ht_node_str *ua_event_node; struct ltt_ust_event *uevent; struct ltt_ust_context *uctx; struct ust_app_event *ua_event; @@ -707,8 +1287,18 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, strncpy(ua_chan->name, uchan->name, sizeof(ua_chan->name)); ua_chan->name[sizeof(ua_chan->name) - 1] = '\0'; - /* Copy event attributes */ - memcpy(&ua_chan->attr, &uchan->attr, sizeof(ua_chan->attr)); + + /* Copy event attributes since the layout is different. */ + ua_chan->attr.subbuf_size = uchan->attr.subbuf_size; + ua_chan->attr.num_subbuf = uchan->attr.num_subbuf; + ua_chan->attr.overwrite = uchan->attr.overwrite; + ua_chan->attr.switch_timer_interval = uchan->attr.switch_timer_interval; + ua_chan->attr.read_timer_interval = uchan->attr.read_timer_interval; + ua_chan->attr.output = uchan->attr.output; + /* + * Note that the attribute channel type is not set since the channel on the + * tracing registry side does not have this information. + */ ua_chan->enabled = uchan->enabled; @@ -724,11 +1314,9 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, /* Copy all events from ltt ust channel to ust app channel */ cds_lfht_for_each_entry(uchan->events->ht, &iter.iter, uevent, node.node) { - struct lttng_ht_iter uiter; - - lttng_ht_lookup(ua_chan->events, (void *) uevent->attr.name, &uiter); - ua_event_node = lttng_ht_iter_get_node_str(&uiter); - if (ua_event_node == NULL) { + ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, + uevent->filter, uevent->attr.loglevel); + if (ua_event == NULL) { DBG2("UST event %s not found on shadow copy channel", uevent->attr.name); ua_event = alloc_ust_app_event(uevent->attr.name, &uevent->attr); @@ -736,7 +1324,7 @@ static void shadow_copy_channel(struct ust_app_channel *ua_chan, continue; } shadow_copy_event(ua_event, uevent); - lttng_ht_add_unique_str(ua_chan->events, &ua_event->node); + add_unique_ust_app_event(ua_chan, ua_event); } } @@ -769,8 +1357,8 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, ua_sess->uid = usess->uid; ua_sess->gid = usess->gid; - ret = snprintf(ua_sess->path, PATH_MAX, "%s/%s-%d-%s", usess->pathname, - app->name, app->key.pid, datetime); + ret = snprintf(ua_sess->path, PATH_MAX, "%s-%d-%s/", app->name, app->pid, + datetime); if (ret < 0) { PERROR("asprintf UST shadow copy session"); /* TODO: We cannot return an error from here.. */ @@ -793,13 +1381,19 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, DBG2("Channel %s not found on shadow session copy, creating it", uchan->name); - ua_chan = alloc_ust_app_channel(uchan->name, &uchan->attr); + ua_chan = alloc_ust_app_channel(uchan->name, ua_sess, &uchan->attr); if (ua_chan == NULL) { /* malloc failed FIXME: Might want to do handle ENOMEM .. */ continue; } - shadow_copy_channel(ua_chan, uchan); + /* + * The concept of metadata channel does not exist on the tracing + * registry side of the session daemon so this can only be a per CPU + * channel and not metadata. + */ + ua_chan->attr.type = LTTNG_UST_CHAN_PER_CPU; + lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node); } } @@ -838,35 +1432,62 @@ error: } /* - * Create a UST session onto the tracer of app and add it the session - * hashtable. + * Create a session on the tracer side for the given app. * - * Return ust app session or NULL on error. + * On success, ua_sess_ptr is populated with the session pointer or else left + * untouched. If the session was created, is_created is set to 1. On error, + * it's left untouched. Note that ua_sess_ptr is mandatory but is_created can + * be NULL. + * + * Returns 0 on success or else a negative code which is either -ENOMEM or + * -ENOTCONN which is the default code if the ustctl_create_session fails. */ -static struct ust_app_session *create_ust_app_session( - struct ltt_ust_session *usess, struct ust_app *app) +static int create_ust_app_session(struct ltt_ust_session *usess, + struct ust_app *app, struct ust_app_session **ua_sess_ptr, + int *is_created) { - int ret; + int ret, created = 0; struct ust_app_session *ua_sess; + assert(usess); + assert(app); + assert(ua_sess_ptr); + + health_code_update(); + ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { DBG2("UST app pid: %d session id %d not found, creating it", - app->key.pid, usess->id); - ua_sess = alloc_ust_app_session(); + app->pid, usess->id); + ua_sess = alloc_ust_app_session(app); if (ua_sess == NULL) { /* Only malloc can failed so something is really wrong */ - goto end; + ret = -ENOMEM; + goto error; } shadow_copy_session(ua_sess, usess, app); + created = 1; } + health_code_update(); + if (ua_sess->handle == -1) { - ret = ustctl_create_session(app->key.sock); + ret = ustctl_create_session(app->sock); if (ret < 0) { - ERR("Creating session for app pid %d", app->key.pid); - /* This means that the tracer is gone... */ - ua_sess = (void*) -1UL; + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("Creating session for app pid %d with ret %d", + app->pid, ret); + } else { + DBG("UST app creating session failed. Application is dead"); + } + delete_ust_app_session(-1, ua_sess, app); + if (ret != -ENOMEM) { + /* + * Tracer is probably gone or got an internal error so let's + * behave like it will soon unregister or not usable. + */ + ret = -ENOTCONN; + } goto error; } @@ -879,16 +1500,31 @@ static struct ust_app_session *create_ust_app_session( DBG2("UST app session created successfully with handle %d", ret); } -end: - return ua_sess; + /* + * Assign consumer if not already set. For one application, there is only + * one possible consumer has of now. + */ + if (!ua_sess->consumer) { + ua_sess->consumer = usess->consumer; + } + + *ua_sess_ptr = ua_sess; + if (is_created) { + *is_created = created; + } + + /* Everything went well. */ + ret = 0; error: - delete_ust_app_session(-1, ua_sess); - return NULL; + health_code_update(); + return ret; } /* * Create a context for the channel on the tracer. + * + * Called with UST app session lock held. */ static int create_ust_app_channel_context(struct ust_app_session *ua_sess, @@ -928,49 +1564,10 @@ error: return ret; } -/* - * Create an UST context and enable it for the event on the tracer. - */ -static -int create_ust_app_event_context(struct ust_app_session *ua_sess, - struct ust_app_event *ua_event, struct lttng_ust_context *uctx, - struct ust_app *app) -{ - int ret = 0; - struct lttng_ht_iter iter; - struct lttng_ht_node_ulong *node; - struct ust_app_ctx *ua_ctx; - - DBG2("UST app adding context to event %s", ua_event->name); - - lttng_ht_lookup(ua_event->ctx, (void *)((unsigned long)uctx->ctx), &iter); - node = lttng_ht_iter_get_node_ulong(&iter); - if (node != NULL) { - ret = -EEXIST; - goto error; - } - - ua_ctx = alloc_ust_app_ctx(uctx); - if (ua_ctx == NULL) { - /* malloc failed */ - ret = -1; - goto error; - } - - lttng_ht_node_init_ulong(&ua_ctx->node, (unsigned long) ua_ctx->ctx.ctx); - lttng_ht_add_unique_ulong(ua_event->ctx, &ua_ctx->node); - - ret = create_ust_event_context(ua_event, ua_ctx, app); - if (ret < 0) { - goto error; - } - -error: - return ret; -} - /* * Enable on the tracer side a ust app event for the session and channel. + * + * Called with UST app session lock held. */ static int enable_ust_app_event(struct ust_app_session *ua_sess, @@ -1059,11 +1656,15 @@ error: } /* - * Create UST app channel and create it on the tracer. + * Create UST app channel and create it on the tracer. Set ua_chanp of the + * newly created channel if not NULL. + * + * Called with UST app session lock held. */ -static struct ust_app_channel *create_ust_app_channel( - struct ust_app_session *ua_sess, struct ltt_ust_channel *uchan, - struct ust_app *app) +static int create_ust_app_channel(struct ust_app_session *ua_sess, + struct ltt_ust_channel *uchan, struct ust_app *app, + struct consumer_output *consumer, enum lttng_ust_chan_type type, + struct ust_app_channel **ua_chanp) { int ret = 0; struct lttng_ht_iter iter; @@ -1078,35 +1679,45 @@ static struct ust_app_channel *create_ust_app_channel( goto end; } - ua_chan = alloc_ust_app_channel(uchan->name, &uchan->attr); + ua_chan = alloc_ust_app_channel(uchan->name, ua_sess, &uchan->attr); if (ua_chan == NULL) { /* Only malloc can fail here */ + ret = -ENOMEM; goto error; } shadow_copy_channel(ua_chan, uchan); - ret = create_ust_channel(app, ua_sess, ua_chan); + /* Set channel type. */ + ua_chan->attr.type = type; + + ret = create_ust_channel(app, ua_sess, ua_chan, consumer); if (ret < 0) { - /* Not found previously means that it does not exist on the tracer */ - assert(ret != -EEXIST); goto error; } - lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node); - DBG2("UST app create channel %s for PID %d completed", ua_chan->name, - app->key.pid); + app->pid); + + /* Only add the channel if successful on the tracer side. */ + lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node); end: - return ua_chan; + if (ua_chanp) { + *ua_chanp = ua_chan; + } + + /* Everything went well. */ + return 0; error: - delete_ust_app_channel(-1, ua_chan); - return NULL; + delete_ust_app_channel(ua_chan->is_sent ? app->sock : -1, ua_chan, app); + return ret; } /* * Create UST app event and create it on the tracer side. + * + * Called with ust app session mutex held. */ static int create_ust_app_event(struct ust_app_session *ua_sess, @@ -1114,14 +1725,12 @@ int create_ust_app_event(struct ust_app_session *ua_sess, struct ust_app *app) { int ret = 0; - struct lttng_ht_iter iter; - struct lttng_ht_node_str *ua_event_node; struct ust_app_event *ua_event; /* Get event node */ - lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &iter); - ua_event_node = lttng_ht_iter_get_node_str(&iter); - if (ua_event_node != NULL) { + ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, + uevent->filter, uevent->attr.loglevel); + if (ua_event != NULL) { ret = -EEXIST; goto end; } @@ -1139,14 +1748,14 @@ int create_ust_app_event(struct ust_app_session *ua_sess, ret = create_ust_event(app, ua_sess, ua_chan, ua_event); if (ret < 0) { /* Not found previously means that it does not exist on the tracer */ - assert(ret != -EEXIST); + assert(ret != -LTTNG_UST_ERR_EXIST); goto error; } - lttng_ht_add_unique_str(ua_chan->events, &ua_event->node); + add_unique_ust_app_event(ua_chan, ua_event); DBG2("UST app create event %s for PID %d completed", ua_event->name, - app->key.pid); + app->pid); end: return ret; @@ -1159,69 +1768,81 @@ error: /* * Create UST metadata and open it on the tracer side. + * + * Called with UST app session lock held. */ static int create_ust_app_metadata(struct ust_app_session *ua_sess, - char *pathname, struct ust_app *app) + struct ust_app *app, struct consumer_output *consumer) { int ret = 0; + struct ust_app_channel *metadata; + struct consumer_socket *socket; - if (ua_sess->metadata == NULL) { - /* Allocate UST metadata */ - ua_sess->metadata = trace_ust_create_metadata(pathname); - if (ua_sess->metadata == NULL) { - /* malloc() failed */ - goto error; - } - - ret = open_ust_metadata(app, ua_sess); - if (ret < 0) { - DBG3("Opening metadata failed. Cleaning up memory"); + assert(ua_sess); + assert(app); + assert(consumer); - /* Cleanup failed metadata struct */ - free(ua_sess->metadata); - /* - * This is very important because delete_ust_app_session check if - * the pointer is null or not in order to delete the metadata. - */ - ua_sess->metadata = NULL; - goto error; - } + if (ua_sess->metadata) { + /* Already exist. Return success. */ + goto end; + } - DBG2("UST metadata opened for app pid %d", app->key.pid); + /* Allocate UST metadata */ + metadata = alloc_ust_app_channel(DEFAULT_METADATA_NAME, ua_sess, NULL); + if (!metadata) { + /* malloc() failed */ + ret = -ENOMEM; + goto error; } - /* Open UST metadata stream */ - if (ua_sess->metadata->stream_obj == NULL) { - ret = create_ust_stream(app, ua_sess); - if (ret < 0) { - goto error; - } + /* Set default attributes for metadata. */ + metadata->attr.overwrite = DEFAULT_CHANNEL_OVERWRITE; + metadata->attr.subbuf_size = default_get_metadata_subbuf_size(); + metadata->attr.num_subbuf = DEFAULT_METADATA_SUBBUF_NUM; + metadata->attr.switch_timer_interval = DEFAULT_UST_CHANNEL_SWITCH_TIMER; + metadata->attr.read_timer_interval = DEFAULT_UST_CHANNEL_READ_TIMER; + metadata->attr.output = LTTNG_UST_MMAP; + metadata->attr.type = LTTNG_UST_CHAN_METADATA; - ret = run_as_mkdir(ua_sess->path, S_IRWXU | S_IRWXG, - ua_sess->uid, ua_sess->gid); - if (ret < 0) { - PERROR("mkdir UST metadata"); - goto error; - } + /* Get the right consumer socket for the application. */ + socket = find_consumer_socket_by_bitness(app->bits_per_long, consumer); + if (!socket) { + ret = -EINVAL; + goto error_consumer; + } - ret = snprintf(ua_sess->metadata->pathname, PATH_MAX, - "%s/metadata", ua_sess->path); - if (ret < 0) { - PERROR("asprintf UST create stream"); - goto error; - } + /* + * Ask the metadata channel creation to the consumer. The metadata object + * will be created by the consumer and kept their. However, the stream is + * never added or monitored until we do a first push metadata to the + * consumer. + */ + ret = ust_consumer_ask_channel(ua_sess, metadata, consumer, socket); + if (ret < 0) { + goto error_consumer; + } - DBG2("UST metadata stream object created for app pid %d", - app->key.pid); - } else { - ERR("Attempting to create stream without metadata opened"); - goto error; + /* + * The setup command will make the metadata stream be sent to the relayd, + * if applicable, and the thread managing the metadatas. This is important + * because after this point, if an error occurs, the only way the stream + * can be deleted is to be monitored in the consumer. + */ + ret = ust_consumer_setup_metadata(socket, metadata); + if (ret < 0) { + goto error_consumer; } - return 0; + ua_sess->metadata = metadata; + + DBG2("UST metadata created for app pid %d", app->pid); +end: + return 0; +error_consumer: + delete_ust_app_channel(-1, metadata, app); error: - return -1; + return ret; } /* @@ -1233,90 +1854,159 @@ struct lttng_ht *ust_app_get_ht(void) } /* - * Return ust app pointer or NULL if not found. + * Return ust app pointer or NULL if not found. RCU read side lock MUST be + * acquired before calling this function. */ struct ust_app *ust_app_find_by_pid(pid_t pid) { + struct ust_app *app = NULL; struct lttng_ht_node_ulong *node; struct lttng_ht_iter iter; - rcu_read_lock(); lttng_ht_lookup(ust_app_ht, (void *)((unsigned long) pid), &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { DBG2("UST app no found with pid %d", pid); goto error; } - rcu_read_unlock(); DBG2("Found UST app by pid %d", pid); - return caa_container_of(node, struct ust_app, node); + app = caa_container_of(node, struct ust_app, pid_n); error: - rcu_read_unlock(); - return NULL; + return app; } /* - * Using pid and uid (of the app), allocate a new ust_app struct and - * add it to the global traceable app list. + * Allocate and init an UST app object using the registration information and + * the command socket. This is called when the command socket connects to the + * session daemon. * - * On success, return 0, else return malloc -ENOMEM, or -EINVAL if app - * bitness is not supported. + * The object is returned on success or else NULL. */ -int ust_app_register(struct ust_register_msg *msg, int sock) +struct ust_app *ust_app_create(struct ust_register_msg *msg, int sock) { - struct ust_app *lta; + struct ust_app *lta = NULL; - if ((msg->bits_per_long == 64 && ust_consumerd64_fd == -EINVAL) - || (msg->bits_per_long == 32 && ust_consumerd32_fd == -EINVAL)) { - ERR("Registration failed: application \"%s\" (pid: %d) has " - "%d-bit long, but no consumerd for this long size is available.\n", - msg->name, msg->pid, msg->bits_per_long); - close(sock); - return -EINVAL; - } - if (msg->major != LTTNG_UST_COMM_MAJOR) { + assert(msg); + assert(sock >= 0); + + DBG3("UST app creating application for socket %d", sock); + + if ((msg->bits_per_long == 64 && + (uatomic_read(&ust_consumerd64_fd) == -EINVAL)) + || (msg->bits_per_long == 32 && + (uatomic_read(&ust_consumerd32_fd) == -EINVAL))) { ERR("Registration failed: application \"%s\" (pid: %d) has " - "communication protocol version %u.%u, but sessiond supports 2.x.\n", - msg->name, msg->pid, msg->major, msg->minor); - close(sock); - return -EINVAL; + "%d-bit long, but no consumerd for this size is available.\n", + msg->name, msg->pid, msg->bits_per_long); + goto error; } + lta = zmalloc(sizeof(struct ust_app)); if (lta == NULL) { PERROR("malloc"); - return -ENOMEM; + goto error; } lta->ppid = msg->ppid; lta->uid = msg->uid; lta->gid = msg->gid; - lta->compatible = 0; /* Not compatible until proven */ + lta->bits_per_long = msg->bits_per_long; + lta->uint8_t_alignment = msg->uint8_t_alignment; + lta->uint16_t_alignment = msg->uint16_t_alignment; + lta->uint32_t_alignment = msg->uint32_t_alignment; + lta->uint64_t_alignment = msg->uint64_t_alignment; + lta->long_alignment = msg->long_alignment; + lta->byte_order = msg->byte_order; + lta->v_major = msg->major; lta->v_minor = msg->minor; - strncpy(lta->name, msg->name, sizeof(lta->name)); - lta->name[16] = '\0'; lta->sessions = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + lta->ust_objd = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + lta->notify_sock = -1; + + /* Copy name and make sure it's NULL terminated. */ + strncpy(lta->name, msg->name, sizeof(lta->name)); + lta->name[UST_APP_PROCNAME_LEN] = '\0'; + + /* + * Before this can be called, when receiving the registration information, + * the application compatibility is checked. So, at this point, the + * application can work with this session daemon. + */ + lta->compatible = 1; + + lta->pid = msg->pid; + lttng_ht_node_init_ulong(<a->pid_n, (unsigned long) lta->pid); + lta->sock = sock; + lttng_ht_node_init_ulong(<a->sock_n, (unsigned long) lta->sock); + + CDS_INIT_LIST_HEAD(<a->teardown_head); - /* Set key map */ - lta->key.pid = msg->pid; - lttng_ht_node_init_ulong(<a->node, (unsigned long)lta->key.pid); - lta->key.sock = sock; - lttng_ht_node_init_ulong(<a->key.node, (unsigned long)lta->key.sock); +error: + return lta; +} + +/* + * For a given application object, add it to every hash table. + */ +void ust_app_add(struct ust_app *app) +{ + assert(app); + assert(app->notify_sock >= 0); rcu_read_lock(); - lttng_ht_add_unique_ulong(ust_app_sock_key_map, <a->key.node); - lttng_ht_add_unique_ulong(ust_app_ht, <a->node); + + /* + * On a re-registration, we want to kick out the previous registration of + * that pid + */ + lttng_ht_add_replace_ulong(ust_app_ht, &app->pid_n); + + /* + * The socket _should_ be unique until _we_ call close. So, a add_unique + * for the ust_app_ht_by_sock is used which asserts fail if the entry was + * already in the table. + */ + lttng_ht_add_unique_ulong(ust_app_ht_by_sock, &app->sock_n); + + /* Add application to the notify socket hash table. */ + lttng_ht_node_init_ulong(&app->notify_sock_n, app->notify_sock); + lttng_ht_add_unique_ulong(ust_app_ht_by_notify_sock, &app->notify_sock_n); + + DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock:%d name:%s " + "notify_sock:%d (version %d.%d)", app->pid, app->ppid, app->uid, + app->gid, app->sock, app->name, app->notify_sock, app->v_major, + app->v_minor); + rcu_read_unlock(); +} - DBG("App registered with pid:%d ppid:%d uid:%d gid:%d sock:%d name:%s" - " (version %d.%d)", lta->key.pid, lta->ppid, lta->uid, lta->gid, - lta->key.sock, lta->name, lta->v_major, lta->v_minor); +/* + * Set the application version into the object. + * + * Return 0 on success else a negative value either an errno code or a + * LTTng-UST error code. + */ +int ust_app_version(struct ust_app *app) +{ + int ret; - return 0; + assert(app); + + ret = ustctl_tracer_version(app->sock, &app->version); + if (ret < 0) { + if (ret != -LTTNG_UST_ERR_EXITING && ret != -EPIPE) { + ERR("UST app %d verson failed with ret %d", app->sock, ret); + } else { + DBG3("UST app %d verion failed. Application is dead", app->sock); + } + } + + return ret; } /* @@ -1330,35 +2020,80 @@ void ust_app_unregister(int sock) struct ust_app *lta; struct lttng_ht_node_ulong *node; struct lttng_ht_iter iter; + struct ust_app_session *ua_sess; int ret; rcu_read_lock(); - lta = find_app_by_sock(sock); - if (lta == NULL) { - ERR("Unregister app sock %d not found!", sock); - goto error; - } - - DBG("PID %d unregistering with sock %d", lta->key.pid, sock); - - /* Remove application from socket hash table */ - lttng_ht_lookup(ust_app_sock_key_map, (void *)((unsigned long) sock), &iter); - ret = lttng_ht_del(ust_app_sock_key_map, &iter); - assert(!ret); /* Get the node reference for a call_rcu */ - lttng_ht_lookup(ust_app_ht, (void *)((unsigned long) lta->key.pid), &iter); + lttng_ht_lookup(ust_app_ht_by_sock, (void *)((unsigned long) sock), &iter); node = lttng_ht_iter_get_node_ulong(&iter); - if (node == NULL) { - ERR("Unable to find app sock %d by pid %d", sock, lta->key.pid); - goto error; - } + assert(node); + + lta = caa_container_of(node, struct ust_app, sock_n); + DBG("PID %d unregistering with sock %d", lta->pid, sock); /* Remove application from PID hash table */ - ret = lttng_ht_del(ust_app_ht, &iter); + ret = lttng_ht_del(ust_app_ht_by_sock, &iter); assert(!ret); - call_rcu(&node->head, delete_ust_app_rcu); -error: + + /* + * Remove application from notify hash table. The thread handling the + * notify socket could have deleted the node so ignore on error because + * either way it's valid. The close of that socket is handled by the other + * thread. + */ + iter.iter.node = <a->notify_sock_n.node; + (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter); + + /* + * Ignore return value since the node might have been removed before by an + * add replace during app registration because the PID can be reassigned by + * the OS. + */ + iter.iter.node = <a->pid_n.node; + ret = lttng_ht_del(ust_app_ht, &iter); + if (ret) { + DBG3("Unregister app by PID %d failed. This can happen on pid reuse", + lta->pid); + } + + /* Remove sessions so they are not visible during deletion.*/ + cds_lfht_for_each_entry(lta->sessions->ht, &iter.iter, ua_sess, + node.node) { + ret = lttng_ht_del(lta->sessions, &iter); + if (ret) { + /* The session was already removed so scheduled for teardown. */ + continue; + } + + /* + * Add session to list for teardown. This is safe since at this point we + * are the only one using this list. + */ + pthread_mutex_lock(&ua_sess->lock); + + /* + * Normally, this is done in the delete session process which is + * executed in the call rcu below. However, upon registration we can't + * afford to wait for the grace period before pushing data or else the + * data pending feature can race between the unregistration and stop + * command where the data pending command is sent *before* the grace + * period ended. + * + * The close metadata below nullifies the metadata pointer in the + * session so the delete session will NOT push/close a second time. + */ + (void) push_metadata(lta, ua_sess); + (void) close_metadata(lta, ua_sess); + + cds_list_add(&ua_sess->teardown_node, <a->teardown_head); + pthread_mutex_unlock(&ua_sess->lock); + } + + /* Free memory */ + call_rcu(<a->pid_n.head, delete_ust_app_rcu); + rcu_read_unlock(); return; } @@ -1386,11 +2121,11 @@ int ust_app_list_events(struct lttng_event **events) size_t nbmem, count = 0; struct lttng_ht_iter iter; struct ust_app *app; - struct lttng_event *tmp; + struct lttng_event *tmp_event; nbmem = UST_APP_EVENT_LIST_SIZE; - tmp = zmalloc(nbmem * sizeof(struct lttng_event)); - if (tmp == NULL) { + tmp_event = zmalloc(nbmem * sizeof(struct lttng_event)); + if (tmp_event == NULL) { PERROR("zmalloc ust app events"); ret = -ENOMEM; goto error; @@ -1398,9 +2133,11 @@ int ust_app_list_events(struct lttng_event **events) rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { struct lttng_ust_tracepoint_iter uiter; + health_code_update(); + if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1408,43 +2145,163 @@ int ust_app_list_events(struct lttng_event **events) */ continue; } - handle = ustctl_tracepoint_list(app->key.sock); + handle = ustctl_tracepoint_list(app->sock); if (handle < 0) { - ERR("UST app list events getting handle failed for app pid %d", - app->key.pid); + if (handle != -EPIPE && handle != -LTTNG_UST_ERR_EXITING) { + ERR("UST app list events getting handle failed for app pid %d", + app->pid); + } continue; } - while ((ret = ustctl_tracepoint_list_get(app->key.sock, handle, - &uiter)) != -ENOENT) { + while ((ret = ustctl_tracepoint_list_get(app->sock, handle, + &uiter)) != -LTTNG_UST_ERR_NOENT) { + /* Handle ustctl error. */ + if (ret < 0) { + free(tmp_event); + if (ret != -LTTNG_UST_ERR_EXITING || ret != -EPIPE) { + ERR("UST app tp list get failed for app %d with ret %d", + app->sock, ret); + } else { + DBG3("UST app tp list get failed. Application is dead"); + } + goto rcu_error; + } + + health_code_update(); if (count >= nbmem) { + /* In case the realloc fails, we free the memory */ + void *ptr; + DBG2("Reallocating event list from %zu to %zu entries", nbmem, 2 * nbmem); nbmem *= 2; - tmp = realloc(tmp, nbmem * sizeof(struct lttng_event)); - if (tmp == NULL) { + ptr = realloc(tmp_event, nbmem * sizeof(struct lttng_event)); + if (ptr == NULL) { PERROR("realloc ust app events"); + free(tmp_event); ret = -ENOMEM; goto rcu_error; } + tmp_event = ptr; } - memcpy(tmp[count].name, uiter.name, LTTNG_UST_SYM_NAME_LEN); - tmp[count].loglevel = uiter.loglevel; - tmp[count].type = LTTNG_UST_TRACEPOINT; - tmp[count].pid = app->key.pid; - tmp[count].enabled = -1; + memcpy(tmp_event[count].name, uiter.name, LTTNG_UST_SYM_NAME_LEN); + tmp_event[count].loglevel = uiter.loglevel; + tmp_event[count].type = (enum lttng_event_type) LTTNG_UST_TRACEPOINT; + tmp_event[count].pid = app->pid; + tmp_event[count].enabled = -1; count++; } } ret = count; - *events = tmp; + *events = tmp_event; DBG2("UST app list events done (%zu events)", count); rcu_error: rcu_read_unlock(); error: + health_code_update(); + return ret; +} + +/* + * Fill events array with all events name of all registered apps. + */ +int ust_app_list_event_fields(struct lttng_event_field **fields) +{ + int ret, handle; + size_t nbmem, count = 0; + struct lttng_ht_iter iter; + struct ust_app *app; + struct lttng_event_field *tmp_event; + + nbmem = UST_APP_EVENT_LIST_SIZE; + tmp_event = zmalloc(nbmem * sizeof(struct lttng_event_field)); + if (tmp_event == NULL) { + PERROR("zmalloc ust app event fields"); + ret = -ENOMEM; + goto error; + } + + rcu_read_lock(); + + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { + struct lttng_ust_field_iter uiter; + + health_code_update(); + + if (!app->compatible) { + /* + * TODO: In time, we should notice the caller of this error by + * telling him that this is a version error. + */ + continue; + } + handle = ustctl_tracepoint_field_list(app->sock); + if (handle < 0) { + if (handle != -EPIPE && handle != -LTTNG_UST_ERR_EXITING) { + ERR("UST app list field getting handle failed for app pid %d", + app->pid); + } + continue; + } + + while ((ret = ustctl_tracepoint_field_list_get(app->sock, handle, + &uiter)) != -LTTNG_UST_ERR_NOENT) { + /* Handle ustctl error. */ + if (ret < 0) { + free(tmp_event); + if (ret != -LTTNG_UST_ERR_EXITING || ret != -EPIPE) { + ERR("UST app tp list field failed for app %d with ret %d", + app->sock, ret); + } else { + DBG3("UST app tp list field failed. Application is dead"); + } + goto rcu_error; + } + + health_code_update(); + if (count >= nbmem) { + /* In case the realloc fails, we free the memory */ + void *ptr; + + DBG2("Reallocating event field list from %zu to %zu entries", nbmem, + 2 * nbmem); + nbmem *= 2; + ptr = realloc(tmp_event, nbmem * sizeof(struct lttng_event_field)); + if (ptr == NULL) { + PERROR("realloc ust app event fields"); + free(tmp_event); + ret = -ENOMEM; + goto rcu_error; + } + tmp_event = ptr; + } + + memcpy(tmp_event[count].field_name, uiter.field_name, LTTNG_UST_SYM_NAME_LEN); + tmp_event[count].type = uiter.type; + tmp_event[count].nowrite = uiter.nowrite; + + memcpy(tmp_event[count].event.name, uiter.event_name, LTTNG_UST_SYM_NAME_LEN); + tmp_event[count].event.loglevel = uiter.loglevel; + tmp_event[count].event.type = LTTNG_UST_TRACEPOINT; + tmp_event[count].event.pid = app->pid; + tmp_event[count].event.enabled = -1; + count++; + } + } + + ret = count; + *fields = tmp_event; + + DBG2("UST app list event fields done (%zu events)", count); + +rcu_error: + rcu_read_unlock(); +error: + health_code_update(); return ret; } @@ -1454,27 +2311,37 @@ error: void ust_app_clean_list(void) { int ret; + struct ust_app *app; struct lttng_ht_iter iter; - struct lttng_ht_node_ulong *node; DBG2("UST app cleaning registered apps hash table"); rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, node, node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { ret = lttng_ht_del(ust_app_ht, &iter); assert(!ret); - call_rcu(&node->head, delete_ust_app_rcu); + call_rcu(&app->pid_n.head, delete_ust_app_rcu); + } + + /* Cleanup socket hash table */ + cds_lfht_for_each_entry(ust_app_ht_by_sock->ht, &iter.iter, app, + sock_n.node) { + ret = lttng_ht_del(ust_app_ht_by_sock, &iter); + assert(!ret); } - /* Destroy is done only when the ht is empty */ - lttng_ht_destroy(ust_app_ht); - cds_lfht_for_each_entry(ust_app_sock_key_map->ht, &iter.iter, node, node) { - ret = lttng_ht_del(ust_app_sock_key_map, &iter); + /* Cleanup notify socket hash table */ + cds_lfht_for_each_entry(ust_app_ht_by_notify_sock->ht, &iter.iter, app, + notify_sock_n.node) { + ret = lttng_ht_del(ust_app_ht_by_notify_sock, &iter); assert(!ret); } + /* Destroy is done only when the ht is empty */ - lttng_ht_destroy(ust_app_sock_key_map); + lttng_ht_destroy(ust_app_ht); + lttng_ht_destroy(ust_app_ht_by_sock); + lttng_ht_destroy(ust_app_ht_by_notify_sock); rcu_read_unlock(); } @@ -1485,7 +2352,8 @@ void ust_app_clean_list(void) void ust_app_ht_alloc(void) { ust_app_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); - ust_app_sock_key_map = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + ust_app_ht_by_sock = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); + ust_app_ht_by_notify_sock = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); } /* @@ -1513,7 +2381,7 @@ int ust_app_disable_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For every registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { struct lttng_ht_iter uiter; if (!app->compatible) { /* @@ -1574,7 +2442,7 @@ int ust_app_enable_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For every registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1621,7 +2489,7 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1640,7 +2508,7 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, ua_chan_node = lttng_ht_iter_get_node_str(&uiter); if (ua_chan_node == NULL) { DBG2("Channel %s not found in session id %d for app pid %d." - "Skipping", uchan->name, usess->id, app->key.pid); + "Skipping", uchan->name, usess->id, app->pid); continue; } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); @@ -1649,7 +2517,7 @@ int ust_app_disable_event_glb(struct ltt_ust_session *usess, ua_event_node = lttng_ht_iter_get_node_str(&uiter); if (ua_event_node == NULL) { DBG2("Event %s not found in channel %s for app pid %d." - "Skipping", uevent->attr.name, uchan->name, app->key.pid); + "Skipping", uevent->attr.name, uchan->name, app->pid); continue; } ua_event = caa_container_of(ua_event_node, struct ust_app_event, node); @@ -1687,7 +2555,7 @@ int ust_app_disable_all_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1696,8 +2564,10 @@ int ust_app_disable_all_event_glb(struct ltt_ust_session *usess, continue; } ua_sess = lookup_session_by_app(usess, app); - /* If ua_sess is NULL, there is a code flow error */ - assert(ua_sess); + if (!ua_sess) { + /* The application has problem or is probably dead. */ + continue; + } /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); @@ -1729,10 +2599,10 @@ int ust_app_disable_all_event_glb(struct ltt_ust_session *usess, int ust_app_create_channel_glb(struct ltt_ust_session *usess, struct ltt_ust_channel *uchan) { + int ret = 0, created; struct lttng_ht_iter iter; struct ust_app *app; - struct ust_app_session *ua_sess; - struct ust_app_channel *ua_chan; + struct ust_app_session *ua_sess = NULL; /* Very wrong code flow */ assert(usess); @@ -1744,7 +2614,7 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For every registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1757,29 +2627,43 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, * that if session exist, it will simply return a pointer to the ust * app session. */ - ua_sess = create_ust_app_session(usess, app); - if (ua_sess == NULL) { - /* The malloc() failed. */ - goto error; - } else if (ua_sess == (void *) -1UL) { - /* The application's socket is not valid. Contiuing */ - continue; + ret = create_ust_app_session(usess, app, &ua_sess, &created); + if (ret < 0) { + switch (ret) { + case -ENOTCONN: + /* + * The application's socket is not valid. Either a bad socket + * or a timeout on it. We can't inform the caller that for a + * specific app, the session failed so lets continue here. + */ + continue; + case -ENOMEM: + default: + goto error_rcu_unlock; + } } + assert(ua_sess); - /* Create channel onto application */ - ua_chan = create_ust_app_channel(ua_sess, uchan, app); - if (ua_chan == NULL) { - /* Major problem here and it's maybe the tracer or malloc() */ - goto error; + pthread_mutex_lock(&ua_sess->lock); + /* Create channel onto application. We don't need the chan ref. */ + ret = create_ust_app_channel(ua_sess, uchan, app, usess->consumer, + LTTNG_UST_CHAN_PER_CPU, NULL); + pthread_mutex_unlock(&ua_sess->lock); + if (ret < 0) { + if (ret == -ENOMEM) { + /* No more memory is a fatal error. Stop right now. */ + goto error_rcu_unlock; + } + /* Cleanup the created session if it's the case. */ + if (created) { + destroy_app_session(app, ua_sess); + } } } +error_rcu_unlock: rcu_read_unlock(); - - return 0; - -error: - return -1; + return ret; } /* @@ -1790,7 +2674,7 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, { int ret = 0; struct lttng_ht_iter iter, uiter; - struct lttng_ht_node_str *ua_chan_node, *ua_event_node; + struct lttng_ht_node_str *ua_chan_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -1808,7 +2692,7 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1817,8 +2701,12 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, continue; } ua_sess = lookup_session_by_app(usess, app); - /* If ua_sess is NULL, there is a code flow error */ - assert(ua_sess); + if (!ua_sess) { + /* The application has problem or is probably dead. */ + continue; + } + + pthread_mutex_lock(&ua_sess->lock); /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); @@ -1828,19 +2716,22 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - lttng_ht_lookup(ua_chan->events, (void*)uevent->attr.name, &uiter); - ua_event_node = lttng_ht_iter_get_node_str(&uiter); - if (ua_event_node == NULL) { + /* Get event node */ + ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, + uevent->filter, uevent->attr.loglevel); + if (ua_event == NULL) { DBG3("UST app enable event %s not found for app PID %d." - "Skipping app", uevent->attr.name, app->key.pid); - continue; + "Skipping app", uevent->attr.name, app->pid); + goto next_app; } - ua_event = caa_container_of(ua_event_node, struct ust_app_event, node); ret = enable_ust_app_event(ua_sess, ua_event, app); if (ret < 0) { + pthread_mutex_unlock(&ua_sess->lock); goto error; } + next_app: + pthread_mutex_unlock(&ua_sess->lock); } error: @@ -1868,7 +2759,7 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, rcu_read_lock(); /* For all registered applications */ - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -1877,9 +2768,12 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, continue; } ua_sess = lookup_session_by_app(usess, app); - /* If ua_sess is NULL, there is a code flow error */ - assert(ua_sess); + if (!ua_sess) { + /* The application has problem or is probably dead. */ + continue; + } + pthread_mutex_lock(&ua_sess->lock); /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); ua_chan_node = lttng_ht_iter_get_node_str(&uiter); @@ -1889,13 +2783,14 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); ret = create_ust_app_event(ua_sess, ua_chan, uevent, app); + pthread_mutex_unlock(&ua_sess->lock); if (ret < 0) { - if (ret != -EEXIST) { + if (ret != -LTTNG_UST_ERR_EXIST) { /* Possible value at this point: -ENOMEM. If so, we stop! */ break; } DBG2("UST app event %s already exist on app PID %d", - uevent->attr.name, app->key.pid); + uevent->attr.name, app->pid); continue; } } @@ -1911,13 +2806,9 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) { int ret = 0; - struct lttng_ht_iter iter; struct ust_app_session *ua_sess; - struct ust_app_channel *ua_chan; - struct ltt_ust_stream *ustream; - int consumerd_fd; - DBG("Starting tracing for ust app pid %d", app->key.pid); + DBG("Starting tracing for ust app pid %d", app->pid); rcu_read_lock(); @@ -1927,89 +2818,74 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { - goto error_rcu_unlock; + /* The session is in teardown process. Ignore and continue. */ + goto end; } + pthread_mutex_lock(&ua_sess->lock); + /* Upon restart, we skip the setup, already done */ if (ua_sess->started) { goto skip_setup; } - ret = create_ust_app_metadata(ua_sess, usess->pathname, app); - if (ret < 0) { - goto error_rcu_unlock; - } - - /* For each channel */ - cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, - node.node) { - /* Create all streams */ - while (1) { - /* Create UST stream */ - ustream = zmalloc(sizeof(*ustream)); - if (ustream == NULL) { - PERROR("zmalloc ust stream"); - goto error_rcu_unlock; - } - - ret = ustctl_create_stream(app->key.sock, ua_chan->obj, - &ustream->obj); - if (ret < 0) { - /* Got all streams */ - break; - } - ustream->handle = ustream->obj->handle; - - /* Order is important */ - cds_list_add_tail(&ustream->list, &ua_chan->streams.head); - ret = snprintf(ustream->pathname, PATH_MAX, "%s/%s_%u", - ua_sess->path, ua_chan->name, - ua_chan->streams.count++); - if (ret < 0) { - PERROR("asprintf UST create stream"); - continue; + /* Create directories if consumer is LOCAL and has a path defined. */ + if (usess->consumer->type == CONSUMER_DST_LOCAL && + strlen(usess->consumer->dst.trace_path) > 0) { + ret = run_as_mkdir_recursive(usess->consumer->dst.trace_path, + S_IRWXU | S_IRWXG, usess->uid, usess->gid); + if (ret < 0) { + if (ret != -EEXIST) { + ERR("Trace directory creation error"); + goto error_unlock; } - DBG2("UST stream %d ready at %s", ua_chan->streams.count, - ustream->pathname); } } - switch (app->bits_per_long) { - case 64: - consumerd_fd = ust_consumerd64_fd; - break; - case 32: - consumerd_fd = ust_consumerd32_fd; - break; - default: - ret = -EINVAL; - goto error_rcu_unlock; - } - - /* Setup UST consumer socket and send fds to it */ - ret = ust_consumer_send_session(consumerd_fd, ua_sess); + /* Create the metadata for the application. */ + ret = create_ust_app_metadata(ua_sess, app, usess->consumer); if (ret < 0) { - goto error_rcu_unlock; + goto error_unlock; } - ua_sess->started = 1; + + health_code_update(); skip_setup: /* This start the UST tracing */ - ret = ustctl_start_session(app->key.sock, ua_sess->handle); + ret = ustctl_start_session(app->sock, ua_sess->handle); if (ret < 0) { - ERR("Error starting tracing for app pid: %d", app->key.pid); - goto error_rcu_unlock; + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("Error starting tracing for app pid: %d (ret: %d)", + app->pid, ret); + } else { + DBG("UST app start session failed. Application is dead."); + } + goto error_unlock; } + /* Indicate that the session has been started once */ + ua_sess->started = 1; + + pthread_mutex_unlock(&ua_sess->lock); + + health_code_update(); + /* Quiescent wait after starting trace */ - ustctl_wait_quiescent(app->key.sock); + ret = ustctl_wait_quiescent(app->sock); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app wait quiescent failed for app pid %d ret %d", + app->pid, ret); + } end: rcu_read_unlock(); + health_code_update(); return 0; -error_rcu_unlock: +error_unlock: + pthread_mutex_unlock(&ua_sess->lock); rcu_read_unlock(); + health_code_update(); return -1; } @@ -2023,77 +2899,108 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app) struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; - DBG("Stopping tracing for ust app pid %d", app->key.pid); + DBG("Stopping tracing for ust app pid %d", app->pid); rcu_read_lock(); if (!app->compatible) { - goto end; + goto end_no_session; } ua_sess = lookup_session_by_app(usess, app); if (ua_sess == NULL) { - /* Only malloc can failed so something is really wrong */ - goto error_rcu_unlock; + goto end_no_session; } - /* Not started, continuing. */ - if (ua_sess->started == 0) { - goto end; + pthread_mutex_lock(&ua_sess->lock); + + /* + * If started = 0, it means that stop trace has been called for a session + * that was never started. It's possible since we can have a fail start + * from either the application manager thread or the command thread. Simply + * indicate that this is a stop error. + */ + if (!ua_sess->started) { + goto error_rcu_unlock; } + health_code_update(); + /* This inhibits UST tracing */ - ret = ustctl_stop_session(app->key.sock, ua_sess->handle); + ret = ustctl_stop_session(app->sock, ua_sess->handle); if (ret < 0) { - ERR("Error stopping tracing for app pid: %d", app->key.pid); + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("Error stopping tracing for app pid: %d (ret: %d)", + app->pid, ret); + } else { + DBG("UST app stop session failed. Application is dead."); + } goto error_rcu_unlock; } + health_code_update(); + /* Quiescent wait after stopping trace */ - ustctl_wait_quiescent(app->key.sock); + ret = ustctl_wait_quiescent(app->sock); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app wait quiescent failed for app pid %d ret %d", + app->pid, ret); + } + + health_code_update(); /* Flushing buffers */ cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, node.node) { - ret = ustctl_sock_flush_buffer(app->key.sock, ua_chan->obj); + health_code_update(); + assert(ua_chan->is_sent); + ret = ustctl_sock_flush_buffer(app->sock, ua_chan->obj); if (ret < 0) { - ERR("UST app PID %d channel %s flush failed with ret %d", - app->key.pid, ua_chan->name, ret); + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app PID %d channel %s flush failed with ret %d", + app->pid, ua_chan->name, ret); + } else { + DBG3("UST app failed to flush %s. Application is dead.", + ua_chan->name); + /* No need to continue. */ + break; + } /* Continuing flushing all buffers */ continue; } } - /* Flush all buffers before stopping */ - ret = ustctl_sock_flush_buffer(app->key.sock, ua_sess->metadata->obj); + health_code_update(); + + ret = push_metadata(app, ua_sess); if (ret < 0) { - ERR("UST app PID %d metadata flush failed with ret %d", app->key.pid, - ret); + goto error_rcu_unlock; } - ua_sess->started = 0; - -end: + pthread_mutex_unlock(&ua_sess->lock); +end_no_session: rcu_read_unlock(); + health_code_update(); return 0; error_rcu_unlock: + pthread_mutex_unlock(&ua_sess->lock); rcu_read_unlock(); + health_code_update(); return -1; } /* * Destroy a specific UST session in apps. */ -int ust_app_destroy_trace(struct ltt_ust_session *usess, struct ust_app *app) +static int destroy_trace(struct ltt_ust_session *usess, struct ust_app *app) { + int ret; struct ust_app_session *ua_sess; - struct lttng_ust_object_data obj; struct lttng_ht_iter iter; struct lttng_ht_node_ulong *node; - int ret; - DBG("Destroy tracing for ust app pid %d", app->key.pid); + DBG("Destroy tracing for ust app pid %d", app->pid); rcu_read_lock(); @@ -2104,30 +3011,26 @@ int ust_app_destroy_trace(struct ltt_ust_session *usess, struct ust_app *app) __lookup_session_by_app(usess, app, &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { - /* Only malloc can failed so something is really wrong */ - goto error_rcu_unlock; + /* Session is being or is deleted. */ + goto end; } ua_sess = caa_container_of(node, struct ust_app_session, node); - ret = lttng_ht_del(app->sessions, &iter); - assert(!ret); - obj.handle = ua_sess->handle; - obj.shm_fd = -1; - obj.wait_fd = -1; - obj.memory_map_size = 0; - ustctl_release_object(app->key.sock, &obj); - delete_ust_app_session(app->key.sock, ua_sess); + health_code_update(); + destroy_app_session(app, ua_sess); - /* Quiescent wait after stopping trace */ - ustctl_wait_quiescent(app->key.sock); + health_code_update(); + /* Quiescent wait after stopping trace */ + ret = ustctl_wait_quiescent(app->sock); + if (ret < 0 && ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app wait quiescent failed for app pid %d ret %d", + app->pid, ret); + } end: rcu_read_unlock(); + health_code_update(); return 0; - -error_rcu_unlock: - rcu_read_unlock(); - return -1; } /* @@ -2143,7 +3046,7 @@ int ust_app_start_trace_all(struct ltt_ust_session *usess) rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { ret = ust_app_start_trace(usess, app); if (ret < 0) { /* Continue to next apps even on error */ @@ -2169,9 +3072,10 @@ int ust_app_stop_trace_all(struct ltt_ust_session *usess) rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { ret = ust_app_stop_trace(usess, app); if (ret < 0) { + ERR("UST app stop trace failed with ret %d", ret); /* Continue to next apps even on error */ continue; } @@ -2195,8 +3099,8 @@ int ust_app_destroy_trace_all(struct ltt_ust_session *usess) rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { - ret = ust_app_destroy_trace(usess, app); + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { + ret = destroy_trace(usess, app); if (ret < 0) { /* Continue to next apps even on error */ continue; @@ -2216,15 +3120,13 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock) int ret = 0; struct lttng_ht_iter iter, uiter, iter_ctx; struct ust_app *app; - struct ust_app_session *ua_sess; + struct ust_app_session *ua_sess = NULL; struct ust_app_channel *ua_chan; struct ust_app_event *ua_event; struct ust_app_ctx *ua_ctx; - if (usess == NULL) { - ERR("No UST session on global update. Returning"); - goto error; - } + assert(usess); + assert(sock >= 0); DBG2("UST app global update for app sock %d for session id %d", sock, usess->id); @@ -2233,7 +3135,11 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock) app = find_app_by_sock(sock); if (app == NULL) { - ERR("Failed to update app sock %d", sock); + /* + * Application can be unregistered before so this is possible hence + * simply stopping the update. + */ + DBG3("UST app update failed to find app sock %d", sock); goto error; } @@ -2241,30 +3147,37 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock) goto error; } - ua_sess = create_ust_app_session(usess, app); - if (ua_sess == NULL) { + ret = create_ust_app_session(usess, app, &ua_sess, NULL); + if (ret < 0) { + /* Tracer is probably gone or ENOMEM. */ goto error; } + assert(ua_sess); + + pthread_mutex_lock(&ua_sess->lock); /* - * We can iterate safely here over all UST app session sicne the create ust + * We can iterate safely here over all UST app session since the create ust * app session above made a shadow copy of the UST global domain from the * ltt ust session. */ cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, node.node) { - ret = create_ust_channel(app, ua_sess, ua_chan); + ret = create_ust_channel(app, ua_sess, ua_chan, usess->consumer); if (ret < 0) { - /* FIXME: Should we quit here or continue... */ - continue; + /* + * Stop everything. On error, the application failed, no more file + * descriptor are available or ENOMEM so stopping here is the only + * thing we can do for now. + */ + goto error_unlock; } cds_lfht_for_each_entry(ua_chan->ctx->ht, &iter_ctx.iter, ua_ctx, node.node) { ret = create_ust_channel_context(ua_chan, ua_ctx, app); if (ret < 0) { - /* FIXME: Should we quit here or continue... */ - continue; + goto error_unlock; } } @@ -2274,32 +3187,32 @@ void ust_app_global_update(struct ltt_ust_session *usess, int sock) node.node) { ret = create_ust_event(app, ua_sess, ua_chan, ua_event); if (ret < 0) { - /* FIXME: Should we quit here or continue... */ - continue; - } - - /* Add context on events. */ - cds_lfht_for_each_entry(ua_event->ctx->ht, &iter_ctx.iter, - ua_ctx, node.node) { - ret = create_ust_event_context(ua_event, ua_ctx, app); - if (ret < 0) { - /* FIXME: Should we quit here or continue... */ - continue; - } + goto error_unlock; } } } + pthread_mutex_unlock(&ua_sess->lock); + if (usess->start_trace) { ret = ust_app_start_trace(usess, app); if (ret < 0) { goto error; } - DBG2("UST trace started for app pid %d", app->key.pid); + DBG2("UST trace started for app pid %d", app->pid); } + /* Everything went well at this point. */ + rcu_read_unlock(); + return; + +error_unlock: + pthread_mutex_unlock(&ua_sess->lock); error: + if (ua_sess) { + destroy_app_session(app, ua_sess); + } rcu_read_unlock(); return; } @@ -2319,7 +3232,7 @@ int ust_app_add_ctx_channel_glb(struct ltt_ust_session *usess, rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -2332,76 +3245,21 @@ int ust_app_add_ctx_channel_glb(struct ltt_ust_session *usess, continue; } + pthread_mutex_lock(&ua_sess->lock); /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); ua_chan_node = lttng_ht_iter_get_node_str(&uiter); if (ua_chan_node == NULL) { - continue; + goto next_app; } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - ret = create_ust_app_channel_context(ua_sess, ua_chan, &uctx->ctx, app); if (ret < 0) { - continue; - } - } - - rcu_read_unlock(); - return ret; -} - -/* - * Add context to a specific event in a channel for global UST domain. - */ -int ust_app_add_ctx_event_glb(struct ltt_ust_session *usess, - struct ltt_ust_channel *uchan, struct ltt_ust_event *uevent, - struct ltt_ust_context *uctx) -{ - int ret = 0; - struct lttng_ht_node_str *ua_chan_node, *ua_event_node; - struct lttng_ht_iter iter, uiter; - struct ust_app_session *ua_sess; - struct ust_app_event *ua_event; - struct ust_app_channel *ua_chan = NULL; - struct ust_app *app; - - rcu_read_lock(); - - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { - if (!app->compatible) { - /* - * TODO: In time, we should notice the caller of this error by - * telling him that this is a version error. - */ - continue; - } - ua_sess = lookup_session_by_app(usess, app); - if (ua_sess == NULL) { - continue; - } - - /* Lookup channel in the ust app session */ - lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); - ua_chan_node = lttng_ht_iter_get_node_str(&uiter); - if (ua_chan_node == NULL) { - continue; - } - ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, - node); - - lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &uiter); - ua_event_node = lttng_ht_iter_get_node_str(&uiter); - if (ua_event_node == NULL) { - continue; - } - ua_event = caa_container_of(ua_event_node, struct ust_app_event, - node); - - ret = create_ust_app_event_context(ua_sess, ua_event, &uctx->ctx, app); - if (ret < 0) { - continue; + goto next_app; } + next_app: + pthread_mutex_unlock(&ua_sess->lock); } rcu_read_unlock(); @@ -2416,7 +3274,7 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess, { int ret = 0; struct lttng_ht_iter iter; - struct lttng_ht_node_str *ua_chan_node, *ua_event_node; + struct lttng_ht_node_str *ua_chan_node; struct ust_app *app; struct ust_app_session *ua_sess; struct ust_app_channel *ua_chan; @@ -2430,18 +3288,22 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess, if (app == NULL) { ERR("UST app enable event per PID %d not found", pid); ret = -1; - goto error; + goto end; } if (!app->compatible) { ret = 0; - goto error; + goto end; } ua_sess = lookup_session_by_app(usess, app); - /* If ua_sess is NULL, there is a code flow error */ - assert(ua_sess); + if (!ua_sess) { + /* The application has problem or is probably dead. */ + ret = 0; + goto end; + } + pthread_mutex_lock(&ua_sess->lock); /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); ua_chan_node = lttng_ht_iter_get_node_str(&iter); @@ -2450,23 +3312,23 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess, ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); - lttng_ht_lookup(ua_chan->events, (void *)uevent->attr.name, &iter); - ua_event_node = lttng_ht_iter_get_node_str(&iter); - if (ua_event_node == NULL) { + ua_event = find_ust_app_event(ua_chan->events, uevent->attr.name, + uevent->filter, uevent->attr.loglevel); + if (ua_event == NULL) { ret = create_ust_app_event(ua_sess, ua_chan, uevent, app); if (ret < 0) { - goto error; + goto end_unlock; } } else { - ua_event = caa_container_of(ua_event_node, struct ust_app_event, node); - ret = enable_ust_app_event(ua_sess, ua_event, app); if (ret < 0) { - goto error; + goto end_unlock; } } -error: +end_unlock: + pthread_mutex_unlock(&ua_sess->lock); +end: rcu_read_unlock(); return ret; } @@ -2502,8 +3364,10 @@ int ust_app_disable_event_pid(struct ltt_ust_session *usess, } ua_sess = lookup_session_by_app(usess, app); - /* If ua_sess is NULL, there is a code flow error */ - assert(ua_sess); + if (!ua_sess) { + /* The application has problem or is probably dead. */ + goto error; + } /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); @@ -2532,45 +3396,6 @@ error: return ret; } -/* - * Validate version of UST apps and set the compatible bit. - */ -int ust_app_validate_version(int sock) -{ - int ret; - struct ust_app *app; - - rcu_read_lock(); - - app = find_app_by_sock(sock); - assert(app); - - ret = ustctl_tracer_version(sock, &app->version); - if (ret < 0) { - goto error; - } - - /* Validate version */ - if (app->version.major > UST_APP_MAJOR_VERSION) { - goto error; - } - - DBG2("UST app PID %d is compatible with major version %d " - "(supporting <= %d)", app->key.pid, app->version.major, - UST_APP_MAJOR_VERSION); - app->compatible = 1; - rcu_read_unlock(); - return 0; - -error: - DBG2("UST app PID %d is not compatible with major version %d " - "(supporting <= %d)", app->key.pid, app->version.major, - UST_APP_MAJOR_VERSION); - app->compatible = 0; - rcu_read_unlock(); - return -1; -} - /* * Calibrate registered applications. */ @@ -2582,7 +3407,7 @@ int ust_app_calibrate_glb(struct lttng_ust_calibrate *calibrate) rcu_read_lock(); - cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, node.node) { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { if (!app->compatible) { /* * TODO: In time, we should notice the caller of this error by @@ -2591,7 +3416,9 @@ int ust_app_calibrate_glb(struct lttng_ust_calibrate *calibrate) continue; } - ret = ustctl_calibrate(app->key.sock, calibrate); + health_code_update(); + + ret = ustctl_calibrate(app->sock, calibrate); if (ret < 0) { switch (ret) { case -ENOSYS: @@ -2599,9 +3426,8 @@ int ust_app_calibrate_glb(struct lttng_ust_calibrate *calibrate) ret = 0; break; default: - /* TODO: Report error to user */ DBG2("Calibrate app PID %d returned with error %d", - app->key.pid, ret); + app->pid, ret); break; } } @@ -2611,5 +3437,413 @@ int ust_app_calibrate_glb(struct lttng_ust_calibrate *calibrate) rcu_read_unlock(); + health_code_update(); + + return ret; +} + +/* + * Receive registration and populate the given msg structure. + * + * On success return 0 else a negative value returned by the ustctl call. + */ +int ust_app_recv_registration(int sock, struct ust_register_msg *msg) +{ + int ret; + uint32_t pid, ppid, uid, gid; + + assert(msg); + + ret = ustctl_recv_reg_msg(sock, &msg->type, &msg->major, &msg->minor, + &pid, &ppid, &uid, &gid, + &msg->bits_per_long, + &msg->uint8_t_alignment, + &msg->uint16_t_alignment, + &msg->uint32_t_alignment, + &msg->uint64_t_alignment, + &msg->long_alignment, + &msg->byte_order, + msg->name); + if (ret < 0) { + switch (-ret) { + case EPIPE: + case ECONNRESET: + case LTTNG_UST_ERR_EXITING: + DBG3("UST app recv reg message failed. Application died"); + break; + case LTTNG_UST_ERR_UNSUP_MAJOR: + ERR("UST app recv reg unsupported version %d.%d. Supporting %d.%d", + msg->major, msg->minor, LTTNG_UST_ABI_MAJOR_VERSION, + LTTNG_UST_ABI_MINOR_VERSION); + break; + default: + ERR("UST app recv reg message failed with ret %d", ret); + break; + } + goto error; + } + msg->pid = (pid_t) pid; + msg->ppid = (pid_t) ppid; + msg->uid = (uid_t) uid; + msg->gid = (gid_t) gid; + +error: + return ret; +} + +/* + * Return a ust app channel object using the application object and the channel + * object descriptor has a key. If not found, NULL is returned. A RCU read side + * lock MUST be acquired before calling this function. + */ +static struct ust_app_channel *find_channel_by_objd(struct ust_app *app, + int objd) +{ + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + struct ust_app_channel *ua_chan = NULL; + + assert(app); + + lttng_ht_lookup(app->ust_objd, (void *)((unsigned long) objd), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG2("UST app channel find by objd %d not found", objd); + goto error; + } + + ua_chan = caa_container_of(node, struct ust_app_channel, ust_objd_node); + +error: + return ua_chan; +} + +/* + * Reply to a register channel notification from an application on the notify + * socket. The channel metadata is also created. + * + * The session UST registry lock is acquired in this function. + * + * On success 0 is returned else a negative value. + */ +static int reply_ust_register_channel(int sock, int sobjd, int cobjd, + size_t nr_fields, struct ustctl_field *fields) +{ + int ret, ret_code = 0; + uint32_t chan_id, reg_count; + enum ustctl_channel_header type; + struct ust_app *app; + struct ust_app_channel *ua_chan; + struct ust_app_session *ua_sess; + + rcu_read_lock(); + + /* Lookup application. If not found, there is a code flow error. */ + app = find_app_by_notify_sock(sock); + if (!app) { + DBG("Application socket %d is being teardown. Abort event notify", + sock); + ret = 0; + goto error_rcu_unlock; + } + + /* Lookup channel by UST object descriptor. Should always be found. */ + ua_chan = find_channel_by_objd(app, cobjd); + assert(ua_chan); + assert(ua_chan->session); + ua_sess = ua_chan->session; + assert(ua_sess); + + pthread_mutex_lock(&ua_sess->registry.lock); + + if (ust_registry_is_max_id(ua_chan->session->registry.used_channel_id)) { + ret_code = -1; + chan_id = -1U; + type = -1; + goto reply; + } + + /* Don't assign ID to metadata. */ + if (ua_chan->attr.type == LTTNG_UST_CHAN_METADATA) { + chan_id = -1U; + } else { + chan_id = ust_registry_get_next_chan_id(&ua_chan->session->registry); + } + + reg_count = ust_registry_get_event_count(&ua_chan->registry); + if (reg_count < 31) { + type = USTCTL_CHANNEL_HEADER_COMPACT; + } else { + type = USTCTL_CHANNEL_HEADER_LARGE; + } + + ua_chan->registry.nr_ctx_fields = nr_fields; + ua_chan->registry.ctx_fields = fields; + ua_chan->registry.chan_id = chan_id; + ua_chan->registry.header_type = type; + + /* Append to metadata */ + if (!ret_code) { + ret_code = ust_metadata_channel_statedump(&ua_chan->session->registry, + &ua_chan->registry); + if (ret_code) { + ERR("Error appending channel metadata (errno = %d)", ret_code); + goto reply; + } + } + +reply: + DBG3("UST app replying to register channel with id %u, type: %d, ret: %d", + chan_id, type, ret_code); + + ret = ustctl_reply_register_channel(sock, chan_id, type, ret_code); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app reply channel failed with ret %d", ret); + } else { + DBG3("UST app reply channel failed. Application died"); + } + goto error; + } + +error: + pthread_mutex_unlock(&ua_sess->registry.lock); +error_rcu_unlock: + rcu_read_unlock(); + return ret; +} + +/* + * Add event to the UST channel registry. When the event is added to the + * registry, the metadata is also created. Once done, this replies to the + * application with the appropriate error code. + * + * The session UST registry lock is acquired in the function. + * + * On success 0 is returned else a negative value. + */ +static int add_event_ust_registry(int sock, int sobjd, int cobjd, char *name, + char *sig, size_t nr_fields, struct ustctl_field *fields, int loglevel, + char *model_emf_uri) +{ + int ret, ret_code; + uint32_t event_id = 0; + struct ust_app *app; + struct ust_app_channel *ua_chan; + struct ust_app_session *ua_sess; + + rcu_read_lock(); + + /* Lookup application. If not found, there is a code flow error. */ + app = find_app_by_notify_sock(sock); + if (!app) { + DBG("Application socket %d is being teardown. Abort event notify", + sock); + ret = 0; + goto error_rcu_unlock; + } + + /* Lookup channel by UST object descriptor. Should always be found. */ + ua_chan = find_channel_by_objd(app, cobjd); + assert(ua_chan); + assert(ua_chan->session); + ua_sess = ua_chan->session; + + pthread_mutex_lock(&ua_sess->registry.lock); + + ret_code = ust_registry_create_event(&ua_sess->registry, + &ua_chan->registry, sobjd, cobjd, name, sig, nr_fields, fields, + loglevel, model_emf_uri, &event_id); + + /* + * The return value is returned to ustctl so in case of an error, the + * application can be notified. In case of an error, it's important not to + * return a negative error or else the application will get closed. + */ + ret = ustctl_reply_register_event(sock, event_id, ret_code); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app reply event failed with ret %d", ret); + } else { + DBG3("UST app reply event failed. Application died"); + } + /* + * No need to wipe the create event since the application socket will + * get close on error hence cleaning up everything by itself. + */ + goto error; + } + + DBG3("UST registry event %s has been added successfully", name); + +error: + pthread_mutex_unlock(&ua_sess->registry.lock); +error_rcu_unlock: + rcu_read_unlock(); + return ret; +} + +/* + * Handle application notification through the given notify socket. + * + * Return 0 on success or else a negative value. + */ +int ust_app_recv_notify(int sock) +{ + int ret; + enum ustctl_notify_cmd cmd; + + DBG3("UST app receiving notify from sock %d", sock); + + ret = ustctl_recv_notify(sock, &cmd); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app recv notify failed with ret %d", ret); + } else { + DBG3("UST app recv notify failed. Application died"); + } + goto error; + } + + switch (cmd) { + case USTCTL_NOTIFY_CMD_EVENT: + { + int sobjd, cobjd, loglevel; + char name[LTTNG_UST_SYM_NAME_LEN], *sig, *model_emf_uri; + size_t nr_fields; + struct ustctl_field *fields; + + DBG2("UST app ustctl register event received"); + + ret = ustctl_recv_register_event(sock, &sobjd, &cobjd, name, &loglevel, + &sig, &nr_fields, &fields, &model_emf_uri); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app recv event failed with ret %d", ret); + } else { + DBG3("UST app recv event failed. Application died"); + } + goto error; + } + + /* Add event to the UST registry coming from the notify socket. */ + ret = add_event_ust_registry(sock, sobjd, cobjd, name, sig, nr_fields, + fields, loglevel, model_emf_uri); + if (ret < 0) { + goto error; + } + + break; + } + case USTCTL_NOTIFY_CMD_CHANNEL: + { + int sobjd, cobjd; + size_t nr_fields; + struct ustctl_field *fields; + + DBG2("UST app ustctl register channel received"); + + ret = ustctl_recv_register_channel(sock, &sobjd, &cobjd, &nr_fields, + &fields); + if (ret < 0) { + if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { + ERR("UST app recv channel failed with ret %d", ret); + } else { + DBG3("UST app recv channel failed. Application died"); + } + goto error; + } + + ret = reply_ust_register_channel(sock, sobjd, cobjd, nr_fields, + fields); + if (ret < 0) { + goto error; + } + + break; + } + default: + /* Should NEVER happen. */ + assert(0); + } + +error: return ret; } + +/* + * Once the notify socket hangs up, this is called. First, it tries to find the + * corresponding application. On failure, the call_rcu to close the socket is + * executed. If an application is found, it tries to delete it from the notify + * socket hash table. Whathever the result, it proceeds to the call_rcu. + * + * Note that an object needs to be allocated here so on ENOMEM failure, the + * call RCU is not done but the rest of the cleanup is. + */ +void ust_app_notify_sock_unregister(int sock) +{ + int err_enomem = 0; + struct lttng_ht_iter iter; + struct ust_app *app; + struct ust_app_notify_sock_obj *obj; + + assert(sock >= 0); + + rcu_read_lock(); + + obj = zmalloc(sizeof(*obj)); + if (!obj) { + /* + * An ENOMEM is kind of uncool. If this strikes we continue the + * procedure but the call_rcu will not be called. In this case, we + * accept the fd leak rather than possibly creating an unsynchronized + * state between threads. + * + * TODO: The notify object should be created once the notify socket is + * registered and stored independantely from the ust app object. The + * tricky part is to synchronize the teardown of the application and + * this notify object. Let's keep that in mind so we can avoid this + * kind of shenanigans with ENOMEM in the teardown path. + */ + err_enomem = 1; + } else { + obj->fd = sock; + } + + DBG("UST app notify socket unregister %d", sock); + + /* + * Lookup application by notify socket. If this fails, this means that the + * hash table delete has already been done by the application + * unregistration process so we can safely close the notify socket in a + * call RCU. + */ + app = find_app_by_notify_sock(sock); + if (!app) { + goto close_socket; + } + + iter.iter.node = &app->notify_sock_n.node; + + /* + * Whatever happens here either we fail or succeed, in both cases we have + * to close the socket after a grace period to continue to the call RCU + * here. If the deletion is successful, the application is not visible + * anymore by other threads and is it fails it means that it was already + * deleted from the hash table so either way we just have to close the + * socket. + */ + (void) lttng_ht_del(ust_app_ht_by_notify_sock, &iter); + +close_socket: + rcu_read_unlock(); + + /* + * Close socket after a grace period to avoid for the socket to be reused + * before the application object is freed creating potential race between + * threads trying to add unique in the global hash table. + */ + if (!err_enomem) { + call_rcu(&obj->head, close_notify_sock_rcu); + } +}