| 1 | /* |
| 2 | * Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com> |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify it |
| 5 | * under the terms of the GNU General Public License, version 2 only, as |
| 6 | * published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 11 | * more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public License along with |
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 51 |
| 15 | * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 16 | */ |
| 17 | |
| 18 | #include <assert.h> |
| 19 | #include <regex.h> |
| 20 | #include <stdio.h> |
| 21 | #include <stdlib.h> |
| 22 | #include <string.h> |
| 23 | |
| 24 | #include <common/common.h> |
| 25 | #include <common/defaults.h> |
| 26 | #include <common/utils.h> |
| 27 | |
| 28 | #include "backward-compatibility-group-by.h" |
| 29 | |
| 30 | #define DATETIME_STRING_SIZE 16 |
| 31 | #define DATETIME_REGEX \ |
| 32 | ".*-[1-2][0-9][0-9][0-9][0-1][0-9][0-3][0-9]-[0-2][0-9][0-5][0-9][0-5][0-9]$" |
| 33 | |
| 34 | /* |
| 35 | * Provide support for --group-output-by-session for producer >= 2.4 and < 2.11. |
| 36 | * Take the stream path, extract all available information, craft a new path to |
| 37 | * the best of our ability enforcing the group by session. |
| 38 | * |
| 39 | * Return the allocated string containing the new stream path or else NULL. |
| 40 | */ |
| 41 | char *backward_compat_group_by_session( |
| 42 | const char *path, const char *local_session_name) |
| 43 | { |
| 44 | int ret; |
| 45 | size_t len; |
| 46 | char *leftover_ptr; |
| 47 | char *local_copy = NULL; |
| 48 | char *datetime = NULL; |
| 49 | char *partial_base_path = NULL; |
| 50 | char *filepath_per_session = NULL; |
| 51 | const char *second_token_ptr; |
| 52 | const char *leftover_second_token_ptr; |
| 53 | const char *hostname_ptr; |
| 54 | regex_t regex; |
| 55 | |
| 56 | assert(path); |
| 57 | assert(local_session_name); |
| 58 | assert(local_session_name[0] != '\0'); |
| 59 | |
| 60 | DBG("Parsing path \"%s\" of session \"%s\" to create a new path that is grouped by session", |
| 61 | path, local_session_name); |
| 62 | |
| 63 | /* Get a local copy for strtok */ |
| 64 | local_copy = strdup(path); |
| 65 | if (!local_copy) { |
| 66 | PERROR("Failed to parse session path: couldn't copy input path"); |
| 67 | goto error; |
| 68 | } |
| 69 | |
| 70 | /* |
| 71 | * The use of strtok with '/' as delimiter is valid since we refuse '/' |
| 72 | * in session name and '/' is not a valid hostname character based on |
| 73 | * RFC-952 [1], RFC-921 [2] and refined in RFC-1123 [2]. |
| 74 | * [1] https://tools.ietf.org/html/rfc952 |
| 75 | * [2] https://tools.ietf.org/html/rfc921 |
| 76 | * [3] https://tools.ietf.org/html/rfc1123#page-13 |
| 77 | */ |
| 78 | |
| 79 | /* |
| 80 | * Get the hostname and possible session_name. |
| 81 | * Note that we can get the hostname and session name from the |
| 82 | * relay_session object we already have. Still, it is easier to |
| 83 | * tokenized the passed path to obtain the start of the path leftover. |
| 84 | */ |
| 85 | hostname_ptr = strtok_r(local_copy, "/", &leftover_ptr); |
| 86 | if (!hostname_ptr) { |
| 87 | ERR("Failed to parse session path \"%s\": couldn't identify hostname", |
| 88 | path); |
| 89 | goto error; |
| 90 | } |
| 91 | |
| 92 | second_token_ptr = strtok_r(NULL, "/", &leftover_ptr); |
| 93 | if (!second_token_ptr) { |
| 94 | ERR("Failed to parse session path \"%s\": couldn't identify session name", |
| 95 | path); |
| 96 | goto error; |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | * Check if the second token is a base path set at url level. This is |
| 101 | * legal in streaming, live and snapshot [1]. Otherwise it is the |
| 102 | * session name with possibly a datetime attached [2]. Note that when |
| 103 | * "adding" snapshot output (lttng snapshot add-output), no session name |
| 104 | * is present in the path by default. The handling for "base path" take |
| 105 | * care of this case as well. |
| 106 | * [1] e.g --set-url net://localhost/my_marvellous_path |
| 107 | * [2] Can be: |
| 108 | * <session_name> |
| 109 | * When using --snapshot on session create. |
| 110 | * <session_name>-<date>-<time> |
| 111 | * <auto>-<date>-<time> |
| 112 | */ |
| 113 | if (strncmp(second_token_ptr, local_session_name, |
| 114 | strlen(local_session_name)) != 0) { |
| 115 | /* |
| 116 | * Token does not start with session name. |
| 117 | * This mean this is an extra path scenario. |
| 118 | * Duplicate the current token since it is part of an |
| 119 | * base_path. |
| 120 | * Set secDuplicate the current token since it is part of an |
| 121 | * base_path. The rest is the leftover. |
| 122 | * Set second_token_ptr to the local_session_name for further |
| 123 | * processing. |
| 124 | */ |
| 125 | partial_base_path = strdup(second_token_ptr); |
| 126 | if (!partial_base_path) { |
| 127 | PERROR("Failed to parse session path: couldn't copy partial base path"); |
| 128 | goto error; |
| 129 | } |
| 130 | |
| 131 | second_token_ptr = local_session_name; |
| 132 | } |
| 133 | |
| 134 | /* |
| 135 | * Based on the previous test, we can move inside the token ptr to |
| 136 | * remove the "local_session_name" and inspect the rest of the token. |
| 137 | * We are looking into extracting the creation datetime from either the |
| 138 | * session_name or the token. We need to to all this gymnastic because |
| 139 | * an extra path could decide to append a datetime to its first |
| 140 | * subdirectory. |
| 141 | * Possible scenario: |
| 142 | * <session_name> |
| 143 | * <session_name>-<date>-<time> |
| 144 | * <auto>-<date>-<time> |
| 145 | * <session_name>_base_path_foo_bar |
| 146 | * <session_name>-<false date>-<false-time> (via a base path) |
| 147 | * |
| 148 | * We have no way to discern from the basic scenario of: |
| 149 | * <session_name>-<date>-<time> |
| 150 | * and one done using a base path with the exact format we normally |
| 151 | * expect. |
| 152 | * |
| 153 | * e.g: |
| 154 | * lttng create my_session -U |
| 155 | * net://localhost/my_session-19910319-120000/ |
| 156 | */ |
| 157 | ret = regcomp(®ex, DATETIME_REGEX, 0); |
| 158 | if (ret) { |
| 159 | ERR("Failed to parse session path: regex compilation failed with code %d", ret); |
| 160 | goto error; |
| 161 | } |
| 162 | |
| 163 | leftover_second_token_ptr = |
| 164 | second_token_ptr + strlen(local_session_name); |
| 165 | len = strlen(leftover_second_token_ptr); |
| 166 | if (len == 0) { |
| 167 | /* |
| 168 | * We are either dealing with an auto session name or only the |
| 169 | * session_name. If this is a auto session name, we need to |
| 170 | * fetch the creation datetime. |
| 171 | */ |
| 172 | ret = regexec(®ex, local_session_name, 0, NULL, 0); |
| 173 | if (ret == 0) { |
| 174 | const ssize_t local_session_name_offset = |
| 175 | strlen(local_session_name) - DATETIME_STRING_SIZE + 1; |
| 176 | |
| 177 | assert(local_session_name_offset >= 0); |
| 178 | datetime = strdup(local_session_name + |
| 179 | local_session_name_offset); |
| 180 | if (!datetime) { |
| 181 | PERROR("Failed to parse session path: couldn't copy datetime on regex match"); |
| 182 | goto error_regex; |
| 183 | } |
| 184 | } |
| 185 | } else if (len == DATETIME_STRING_SIZE && |
| 186 | !regexec(®ex, leftover_second_token_ptr, 0, NULL, |
| 187 | 0)) { |
| 188 | /* |
| 189 | * The leftover from the second token is of format |
| 190 | * "-<datetime>", use it as the creation time. |
| 191 | * Ignore leading "-". |
| 192 | */ |
| 193 | datetime = strdup(&leftover_second_token_ptr[1]); |
| 194 | if (!datetime) { |
| 195 | PERROR("Failed to parse session path: couldn't copy datetime on regex match"); |
| 196 | goto error_regex; |
| 197 | } |
| 198 | } else { |
| 199 | /* |
| 200 | * Base path scenario. |
| 201 | * We cannot try to extract the datetime from the session name |
| 202 | * since nothing prevent a user to name a session in the |
| 203 | * "name-<datetime>" format. Using the datetime from such a |
| 204 | * session would be invalid. |
| 205 | * */ |
| 206 | assert(partial_base_path == NULL); |
| 207 | assert(datetime == NULL); |
| 208 | |
| 209 | partial_base_path = strdup(second_token_ptr); |
| 210 | if (!partial_base_path) { |
| 211 | PERROR("Failed to parse session path: couldn't copy partial base path"); |
| 212 | goto error_regex; |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | ret = asprintf(&filepath_per_session, "%s/%s%s%s/%s%s%s", |
| 217 | local_session_name, hostname_ptr, datetime ? "-" : "", |
| 218 | datetime ? datetime : "", |
| 219 | partial_base_path ? partial_base_path : "", |
| 220 | partial_base_path ? "/" : "", leftover_ptr); |
| 221 | if (ret < 0) { |
| 222 | filepath_per_session = NULL; |
| 223 | goto error; |
| 224 | } |
| 225 | error_regex: |
| 226 | regfree(®ex); |
| 227 | error: |
| 228 | free(local_copy); |
| 229 | free(partial_base_path); |
| 230 | free(datetime); |
| 231 | return filepath_per_session; |
| 232 | } |