X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fust-app.c;h=96ba2f4bcad0649afe1cfcec93e2f6c1f13002df;hp=c30792c3c785e959968dc9c00434c0e12af1ec73;hb=df5b86c84d896eb2d74a8757c234492c1d1fc3be;hpb=a9ad0c8fb50ac8cf9e9812dd9c9b4f949bac19a8 diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index c30792c3c..96ba2f4bc 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -431,6 +431,9 @@ void delete_ust_app_channel(int sock, struct ust_app_channel *ua_chan, * Must be called with the registry lock held. * * On success, return the len of metadata pushed or else a negative value. + * Returning a -EPIPE return value means we could not send the metadata, + * but it can be caused by recoverable errors (e.g. the application has + * terminated concurrently). */ ssize_t ust_app_push_metadata(struct ust_registry_session *registry, struct consumer_socket *socket, int send_zero_data) @@ -454,9 +457,10 @@ ssize_t ust_app_push_metadata(struct ust_registry_session *registry, /* * On a push metadata error either the consumer is dead or the * metadata channel has been destroyed because its endpoint - * might have died (e.g: relayd). If so, the metadata closed - * flag is set to 1 so we deny pushing metadata again which is - * not valid anymore on the consumer side. + * might have died (e.g: relayd), or because the application has + * exited. If so, the metadata closed flag is set to 1 so we + * deny pushing metadata again which is not valid anymore on the + * consumer side. */ if (registry->metadata_closed) { return -EPIPE; @@ -547,6 +551,9 @@ error_push: * of socket throughout this function. * * Return 0 on success else a negative error. + * Returning a -EPIPE return value means we could not send the metadata, + * but it can be caused by recoverable errors (e.g. the application has + * terminated concurrently). */ static int push_metadata(struct ust_registry_session *registry, struct consumer_output *consumer) @@ -670,6 +677,9 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess, pthread_mutex_lock(&ua_sess->lock); + assert(!ua_sess->deleted); + ua_sess->deleted = true; + registry = get_session_registry(ua_sess); if (registry) { /* Push metadata for application before freeing the application. */ @@ -712,6 +722,8 @@ void delete_ust_app_session(int sock, struct ust_app_session *ua_sess, } pthread_mutex_unlock(&ua_sess->lock); + consumer_output_put(ua_sess->consumer); + call_rcu(&ua_sess->rcu_head, delete_ust_app_session_rcu); } @@ -973,15 +985,15 @@ error: * * Return allocated filter or NULL on error. */ -static struct lttng_ust_filter_bytecode *alloc_copy_ust_app_filter( - struct lttng_ust_filter_bytecode *orig_f) +static struct lttng_filter_bytecode *copy_filter_bytecode( + struct lttng_filter_bytecode *orig_f) { - struct lttng_ust_filter_bytecode *filter = NULL; + struct lttng_filter_bytecode *filter = NULL; /* Copy filter bytecode */ filter = zmalloc(sizeof(*filter) + orig_f->len); if (!filter) { - PERROR("zmalloc alloc ust app filter"); + PERROR("zmalloc alloc filter bytecode"); goto error; } @@ -991,6 +1003,30 @@ error: return filter; } +/* + * Create a liblttng-ust filter bytecode from given bytecode. + * + * Return allocated filter or NULL on error. + */ +static struct lttng_ust_filter_bytecode *create_ust_bytecode_from_bytecode( + struct lttng_filter_bytecode *orig_f) +{ + struct lttng_ust_filter_bytecode *filter = NULL; + + /* Copy filter bytecode */ + filter = zmalloc(sizeof(*filter) + orig_f->len); + if (!filter) { + PERROR("zmalloc alloc ust filter bytecode"); + goto error; + } + + assert(sizeof(struct lttng_filter_bytecode) == + sizeof(struct lttng_ust_filter_bytecode)); + memcpy(filter, orig_f, sizeof(*filter) + orig_f->len); +error: + return filter; +} + /* * Find an ust_app using the sock and return it. RCU read side lock must be * held before calling this helper function. @@ -1043,7 +1079,7 @@ error: * Return an ust_app_event object or NULL on error. */ static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht, - char *name, struct lttng_ust_filter_bytecode *filter, int loglevel, + char *name, struct lttng_filter_bytecode *filter, int loglevel, const struct lttng_event_exclusion *exclusion) { struct lttng_ht_iter iter; @@ -1059,7 +1095,7 @@ static struct ust_app_event *find_ust_app_event(struct lttng_ht *ht, key.filter = filter; key.loglevel = loglevel; /* lttng_event_exclusion and lttng_ust_event_exclusion structures are similar */ - key.exclusion = (struct lttng_ust_event_exclusion *)exclusion; + key.exclusion = exclusion; /* Lookup using the event name as hash and a custom match fct. */ cds_lfht_lookup(ht->ht, ht->hash_fct((void *) name, lttng_ht_seed), @@ -1124,6 +1160,7 @@ int set_ust_event_filter(struct ust_app_event *ua_event, struct ust_app *app) { int ret; + struct lttng_ust_filter_bytecode *ust_bytecode = NULL; health_code_update(); @@ -1132,7 +1169,12 @@ int set_ust_event_filter(struct ust_app_event *ua_event, goto error; } - ret = ustctl_set_filter(app->sock, ua_event->filter, + ust_bytecode = create_ust_bytecode_from_bytecode(ua_event->filter); + if (!ust_bytecode) { + ret = -LTTNG_ERR_NOMEM; + goto error; + } + ret = ustctl_set_filter(app->sock, ust_bytecode, ua_event->obj); if (ret < 0) { if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { @@ -1154,9 +1196,31 @@ int set_ust_event_filter(struct ust_app_event *ua_event, error: health_code_update(); + free(ust_bytecode); return ret; } +static +struct lttng_ust_event_exclusion *create_ust_exclusion_from_exclusion( + struct lttng_event_exclusion *exclusion) +{ + struct lttng_ust_event_exclusion *ust_exclusion = NULL; + size_t exclusion_alloc_size = sizeof(struct lttng_ust_event_exclusion) + + LTTNG_UST_SYM_NAME_LEN * exclusion->count; + + ust_exclusion = zmalloc(exclusion_alloc_size); + if (!ust_exclusion) { + PERROR("malloc"); + goto end; + } + + assert(sizeof(struct lttng_event_exclusion) == + sizeof(struct lttng_ust_event_exclusion)); + memcpy(ust_exclusion, exclusion, exclusion_alloc_size); +end: + return ust_exclusion; +} + /* * Set event exclusions on the tracer. */ @@ -1165,6 +1229,7 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event, struct ust_app *app) { int ret; + struct lttng_ust_event_exclusion *ust_exclusion = NULL; health_code_update(); @@ -1173,8 +1238,13 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event, goto error; } - ret = ustctl_set_exclusion(app->sock, ua_event->exclusion, - ua_event->obj); + ust_exclusion = create_ust_exclusion_from_exclusion( + ua_event->exclusion); + if (!ust_exclusion) { + ret = -LTTNG_ERR_NOMEM; + goto error; + } + ret = ustctl_set_exclusion(app->sock, ust_exclusion, ua_event->obj); if (ret < 0) { if (ret != -EPIPE && ret != -LTTNG_UST_ERR_EXITING) { ERR("UST app event %s exclusions failed for app (pid: %d) " @@ -1195,6 +1265,7 @@ int set_ust_event_exclusion(struct ust_app_event *ua_event, error: health_code_update(); + free(ust_exclusion); return ret; } @@ -1366,7 +1437,10 @@ static int send_channel_pid_to_ust(struct ust_app *app, /* Send channel to the application. */ ret = ust_consumer_send_channel_to_ust(app, ua_sess, ua_chan); - if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = -ENOTCONN; /* Caused by app exiting. */ + goto error; + } else if (ret < 0) { goto error; } @@ -1375,7 +1449,10 @@ static int send_channel_pid_to_ust(struct ust_app *app, /* Send all streams to application. */ cds_list_for_each_entry_safe(stream, stmp, &ua_chan->streams.head, list) { ret = ust_consumer_send_stream_to_ust(app, ua_chan, stream); - if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = -ENOTCONN; /* Caused by app exiting. */ + goto error; + } else if (ret < 0) { goto error; } /* We don't need the stream anymore once sent to the tracer. */ @@ -1496,13 +1573,13 @@ static void shadow_copy_event(struct ust_app_event *ua_event, /* Copy filter bytecode */ if (uevent->filter) { - ua_event->filter = alloc_copy_ust_app_filter(uevent->filter); + ua_event->filter = copy_filter_bytecode(uevent->filter); /* Filter might be NULL here in case of ENONEM. */ } /* Copy exclusion data */ if (uevent->exclusion) { - exclusion_alloc_size = sizeof(struct lttng_ust_event_exclusion) + + exclusion_alloc_size = sizeof(struct lttng_event_exclusion) + LTTNG_UST_SYM_NAME_LEN * uevent->exclusion->count; ua_event->exclusion = zmalloc(exclusion_alloc_size); if (ua_event->exclusion == NULL) { @@ -1610,8 +1687,11 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, ua_sess->egid = usess->gid; ua_sess->buffer_type = usess->buffer_type; ua_sess->bits_per_long = app->bits_per_long; + /* There is only one consumer object per session possible. */ + consumer_output_get(usess->consumer); ua_sess->consumer = usess->consumer; + ua_sess->output_traces = usess->output_traces; ua_sess->live_timer_interval = usess->live_timer_interval; copy_channel_attr_to_ustctl(&ua_sess->metadata_attr, @@ -1698,9 +1778,10 @@ static void shadow_copy_session(struct ust_app_session *ua_sess, lttng_ht_add_unique_str(ua_sess->channels, &ua_chan->node); } + return; error: - return; + consumer_output_put(ua_sess->consumer); } /* @@ -2507,7 +2588,10 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *reg_chan, /* Send channel to the application. */ ret = ust_consumer_send_channel_to_ust(app, ua_sess, ua_chan); - if (ret < 0) { + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = -ENOTCONN; /* Caused by app exiting. */ + goto error; + } else if (ret < 0) { goto error; } @@ -2526,6 +2610,12 @@ static int send_channel_uid_to_ust(struct buffer_reg_channel *reg_chan, ret = ust_consumer_send_stream_to_ust(app, ua_chan, &stream); if (ret < 0) { (void) release_ust_app_stream(-1, &stream); + if (ret == -EPIPE || ret == -LTTNG_UST_ERR_EXITING) { + ret = -ENOTCONN; /* Caused by app exiting. */ + goto error_stream_unlock; + } else if (ret < 0) { + goto error_stream_unlock; + } goto error_stream_unlock; } @@ -2619,10 +2709,9 @@ static int create_channel_per_uid(struct ust_app *app, /* Send buffers to the application. */ ret = send_channel_uid_to_ust(reg_chan, app, ua_sess, ua_chan); if (ret < 0) { - /* - * Don't report error to the console, since it may be - * caused by application concurrently exiting. - */ + if (ret != -ENOTCONN) { + ERR("Error sending channel to application"); + } goto error; } @@ -2673,10 +2762,9 @@ static int create_channel_per_pid(struct ust_app *app, ret = send_channel_pid_to_ust(app, ua_sess, ua_chan); if (ret < 0) { - /* - * Don't report error to the console, since it may be - * caused by application concurrently exiting. - */ + if (ret != -ENOTCONN) { + ERR("Error sending channel to application"); + } goto error; } @@ -2690,7 +2778,8 @@ error: * need and send it to the application. This MUST be called with a RCU read * side lock acquired. * - * Return 0 on success or else a negative value. + * Return 0 on success or else a negative value. Returns -ENOTCONN if + * the application exited concurrently. */ static int do_create_channel(struct ust_app *app, struct ltt_ust_session *usess, struct ust_app_session *ua_sess, @@ -2749,7 +2838,8 @@ error: * * Called with UST app session lock and RCU read-side lock held. * - * Return 0 on success or else a negative value. + * Return 0 on success or else a negative value. Returns -ENOTCONN if + * the application exited concurrently. */ static int create_ust_app_channel(struct ust_app_session *ua_sess, struct ltt_ust_channel *uchan, struct ust_app *app, @@ -3162,6 +3252,11 @@ void ust_app_unregister(int sock) */ pthread_mutex_lock(&ua_sess->lock); + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + continue; + } + /* * Normally, this is done in the delete session process which is * executed in the call rcu below. However, upon registration we can't @@ -3745,6 +3840,7 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, * or a timeout on it. We can't inform the caller that for a * specific app, the session failed so lets continue here. */ + ret = 0; /* Not an error. */ continue; case -ENOMEM: default: @@ -3754,6 +3850,12 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, assert(ua_sess); pthread_mutex_lock(&ua_sess->lock); + + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + continue; + } + if (!strncmp(uchan->name, DEFAULT_METADATA_NAME, sizeof(uchan->name))) { copy_channel_attr_to_ustctl(&ua_sess->metadata_attr, &uchan->attr); @@ -3765,14 +3867,23 @@ int ust_app_create_channel_glb(struct ltt_ust_session *usess, } pthread_mutex_unlock(&ua_sess->lock); if (ret < 0) { - if (ret == -ENOMEM) { - /* No more memory is a fatal error. Stop right now. */ - goto error_rcu_unlock; - } /* Cleanup the created session if it's the case. */ if (created) { destroy_app_session(app, ua_sess); } + switch (ret) { + case -ENOTCONN: + /* + * The application's socket is not valid. Either a bad socket + * or a timeout on it. We can't inform the caller that for a + * specific app, the session failed so lets continue here. + */ + ret = 0; /* Not an error. */ + continue; + case -ENOMEM: + default: + goto error_rcu_unlock; + } } } @@ -3823,11 +3934,23 @@ int ust_app_enable_event_glb(struct ltt_ust_session *usess, pthread_mutex_lock(&ua_sess->lock); + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + continue; + } + /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); ua_chan_node = lttng_ht_iter_get_node_str(&uiter); - /* If the channel is not found, there is a code flow error */ - assert(ua_chan_node); + /* + * It is possible that the channel cannot be found is + * the channel/event creation occurs concurrently with + * an application exit. + */ + if (!ua_chan_node) { + pthread_mutex_unlock(&ua_sess->lock); + continue; + } ua_chan = caa_container_of(ua_chan_node, struct ust_app_channel, node); @@ -3889,6 +4012,12 @@ int ust_app_create_event_glb(struct ltt_ust_session *usess, } pthread_mutex_lock(&ua_sess->lock); + + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + continue; + } + /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); ua_chan_node = lttng_ht_iter_get_node_str(&uiter); @@ -3940,6 +4069,11 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) pthread_mutex_lock(&ua_sess->lock); + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + goto end; + } + /* Upon restart, we skip the setup, already done */ if (ua_sess->started) { goto skip_setup; @@ -3951,7 +4085,7 @@ int ust_app_start_trace(struct ltt_ust_session *usess, struct ust_app *app) ret = run_as_mkdir_recursive(usess->consumer->dst.trace_path, S_IRWXU | S_IRWXG, ua_sess->euid, ua_sess->egid); if (ret < 0) { - if (ret != -EEXIST) { + if (errno != EEXIST) { ERR("Trace directory creation error"); goto error_unlock; } @@ -4040,6 +4174,11 @@ int ust_app_stop_trace(struct ltt_ust_session *usess, struct ust_app *app) pthread_mutex_lock(&ua_sess->lock); + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + goto end_no_session; + } + /* * If started = 0, it means that stop trace has been called for a session * that was never started. It's possible since we can have a fail start @@ -4120,6 +4259,10 @@ int ust_app_flush_app_session(struct ust_app *app, pthread_mutex_lock(&ua_sess->lock); + if (ua_sess->deleted) { + goto end_deleted; + } + health_code_update(); /* Flushing buffers */ @@ -4149,6 +4292,7 @@ int ust_app_flush_app_session(struct ust_app *app, health_code_update(); +end_deleted: pthread_mutex_unlock(&ua_sess->lock); end_not_compatible: @@ -4382,6 +4526,11 @@ void ust_app_global_create(struct ltt_ust_session *usess, struct ust_app *app) pthread_mutex_lock(&ua_sess->lock); + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + goto end; + } + /* * We can iterate safely here over all UST app session since the create ust * app session above made a shadow copy of the UST global domain from the @@ -4390,11 +4539,14 @@ void ust_app_global_create(struct ltt_ust_session *usess, struct ust_app *app) cds_lfht_for_each_entry(ua_sess->channels->ht, &iter.iter, ua_chan, node.node) { ret = do_create_channel(app, usess, ua_sess, ua_chan); - if (ret < 0) { + if (ret < 0 && ret != -ENOTCONN) { /* - * Stop everything. On error, the application failed, no more - * file descriptor are available or ENOMEM so stopping here is - * the only thing we can do for now. + * Stop everything. On error, the application + * failed, no more file descriptor are available + * or ENOMEM so stopping here is the only thing + * we can do for now. The only exception is + * -ENOTCONN, which indicates that the application + * has exit. */ goto error_unlock; } @@ -4524,6 +4676,12 @@ int ust_app_add_ctx_channel_glb(struct ltt_ust_session *usess, } pthread_mutex_lock(&ua_sess->lock); + + if (ua_sess->deleted) { + pthread_mutex_unlock(&ua_sess->lock); + continue; + } + /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &uiter); ua_chan_node = lttng_ht_iter_get_node_str(&uiter); @@ -4582,6 +4740,12 @@ int ust_app_enable_event_pid(struct ltt_ust_session *usess, } pthread_mutex_lock(&ua_sess->lock); + + if (ua_sess->deleted) { + ret = 0; + goto end_unlock; + } + /* Lookup channel in the ust app session */ lttng_ht_lookup(ua_sess->channels, (void *)uchan->name, &iter); ua_chan_node = lttng_ht_iter_get_node_str(&iter);