5 * LTTng filter expression parser
7 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 * SPDX-License-Identifier: LGPL-2.1-only
11 * Grammar inspired from http://www.quut.com/c/ANSI-C-grammar-y.html
19 #include "common/bytecode/bytecode.h"
20 #include "filter-ast.h"
21 #include "filter-parser.h"
22 #include "memstream.h"
24 #include <common/compat/errno.h>
25 #include <common/macros.h>
27 #define WIDTH_u64_SCANF_IS_A_BROKEN_API "20"
28 #define WIDTH_o64_SCANF_IS_A_BROKEN_API "22"
29 #define WIDTH_x64_SCANF_IS_A_BROKEN_API "17"
30 #define WIDTH_lg_SCANF_IS_A_BROKEN_API "4096" /* Hugely optimistic approximation */
33 static const int print_xml = 1;
34 #define dbg_printf(fmt, args...) \
35 printf("[debug filter_parser] " fmt, ## args)
37 static const int print_xml = 0;
38 #define dbg_printf(fmt, args...) \
40 /* do nothing but check printf format */ \
42 printf("[debug filter_parser] " fmt, ## args); \
47 int filter_parser_debug = 0;
49 int yyparse(struct filter_parser_ctx *parser_ctx, yyscan_t scanner);
50 int yylex(union YYSTYPE *yyval, yyscan_t scanner);
51 int yylex_init_extra(struct filter_parser_ctx *parser_ctx, yyscan_t * ptr_yy_globals);
52 int yylex_destroy(yyscan_t yyparser_ctx);
53 void yyrestart(FILE * in_str, yyscan_t parser_ctx);
56 struct cds_list_head gc;
61 static const char *node_type_to_str[] = {
62 [ NODE_UNKNOWN ] = "NODE_UNKNOWN",
63 [ NODE_ROOT ] = "NODE_ROOT",
64 [ NODE_EXPRESSION ] = "NODE_EXPRESSION",
65 [ NODE_OP ] = "NODE_OP",
66 [ NODE_UNARY_OP ] = "NODE_UNARY_OP",
69 const char *node_type(struct filter_node *node)
71 if (node->type < NR_NODE_TYPES)
72 return node_type_to_str[node->type];
77 static struct gc_string *gc_string_alloc(struct filter_parser_ctx *parser_ctx,
80 struct gc_string *gstr;
83 /* TODO: could be faster with find first bit or glib Gstring */
84 /* sizeof long to account for malloc header (int or long ?) */
85 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + len;
88 gstr = zmalloc(alloclen);
92 cds_list_add(&gstr->gc, &parser_ctx->allocated_strings);
93 gstr->alloclen = alloclen;
99 * note: never use gc_string_append on a string that has external references.
100 * gsrc will be garbage collected immediately, and gstr might be.
101 * Should only be used to append characters to a string literal or constant.
104 struct gc_string *gc_string_append(struct filter_parser_ctx *parser_ctx,
105 struct gc_string *gstr,
106 struct gc_string *gsrc)
108 size_t newlen = strlen(gsrc->s) + strlen(gstr->s) + 1;
111 /* TODO: could be faster with find first bit or glib Gstring */
112 /* sizeof long to account for malloc header (int or long ?) */
113 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + newlen;
116 if (alloclen > gstr->alloclen) {
117 struct gc_string *newgstr;
119 newgstr = gc_string_alloc(parser_ctx, newlen);
120 strcpy(newgstr->s, gstr->s);
121 strcat(newgstr->s, gsrc->s);
122 cds_list_del(&gstr->gc);
126 strcat(gstr->s, gsrc->s);
128 cds_list_del(&gsrc->gc);
133 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src)
135 lvalp->gs = gc_string_alloc(parser_ctx, strlen(src) + 1);
136 strcpy(lvalp->gs->s, src);
139 static struct filter_node *make_node(struct filter_parser_ctx *scanner,
142 struct filter_ast *ast = filter_parser_get_ast(scanner);
143 struct filter_node *node;
145 node = zmalloc(sizeof(*node));
148 memset(node, 0, sizeof(*node));
150 cds_list_add(&node->gc, &ast->allocated_nodes);
154 fprintf(stderr, "[error] %s: trying to create root node\n", __func__);
157 case NODE_EXPRESSION:
166 fprintf(stderr, "[error] %s: unknown node type %d\n", __func__,
174 static struct filter_node *make_op_node(struct filter_parser_ctx *scanner,
176 struct filter_node *lchild,
177 struct filter_node *rchild)
179 struct filter_ast *ast = filter_parser_get_ast(scanner);
180 struct filter_node *node;
182 node = zmalloc(sizeof(*node));
185 memset(node, 0, sizeof(*node));
186 node->type = NODE_OP;
187 cds_list_add(&node->gc, &ast->allocated_nodes);
188 node->u.op.type = type;
189 node->u.op.lchild = lchild;
190 node->u.op.rchild = rchild;
195 void yyerror(struct filter_parser_ctx *parser_ctx, yyscan_t scanner, const char *str)
197 fprintf(stderr, "error %s\n", str);
200 #define parse_error(parser_ctx, str) \
202 yyerror(parser_ctx, parser_ctx->scanner, YY_("parse error: " str "\n")); \
206 static void free_strings(struct cds_list_head *list)
208 struct gc_string *gstr, *tmp;
210 cds_list_for_each_entry_safe(gstr, tmp, list, gc)
214 static struct filter_ast *filter_ast_alloc(void)
216 struct filter_ast *ast;
218 ast = zmalloc(sizeof(*ast));
221 memset(ast, 0, sizeof(*ast));
222 CDS_INIT_LIST_HEAD(&ast->allocated_nodes);
223 ast->root.type = NODE_ROOT;
227 static void filter_ast_free(struct filter_ast *ast)
229 struct filter_node *node, *tmp;
231 cds_list_for_each_entry_safe(node, tmp, &ast->allocated_nodes, gc)
236 int filter_parser_ctx_append_ast(struct filter_parser_ctx *parser_ctx)
238 return yyparse(parser_ctx, parser_ctx->scanner);
241 struct filter_parser_ctx *filter_parser_ctx_alloc(FILE *input)
243 struct filter_parser_ctx *parser_ctx;
246 yydebug = filter_parser_debug;
248 parser_ctx = zmalloc(sizeof(*parser_ctx));
251 memset(parser_ctx, 0, sizeof(*parser_ctx));
253 ret = yylex_init_extra(parser_ctx, &parser_ctx->scanner);
255 fprintf(stderr, "yylex_init error\n");
256 goto cleanup_parser_ctx;
258 /* Start processing new stream */
259 yyrestart(input, parser_ctx->scanner);
261 parser_ctx->ast = filter_ast_alloc();
262 if (!parser_ctx->ast)
264 CDS_INIT_LIST_HEAD(&parser_ctx->allocated_strings);
267 fprintf(stdout, "parser_ctx input is a%s.\n",
268 isatty(fileno(input)) ? "n interactive tty" :
269 " noninteractive file");
274 ret = yylex_destroy(parser_ctx->scanner);
276 fprintf(stderr, "yylex_destroy error\n");
282 void filter_parser_ctx_free(struct filter_parser_ctx *parser_ctx)
286 ret = yylex_destroy(parser_ctx->scanner);
288 fprintf(stderr, "yylex_destroy error\n");
290 filter_ast_free(parser_ctx->ast);
291 free_strings(&parser_ctx->allocated_strings);
292 filter_ir_free(parser_ctx);
293 free(parser_ctx->bytecode);
294 free(parser_ctx->bytecode_reloc);
299 int filter_parser_ctx_create_from_filter_expression(
300 const char *filter_expression, struct filter_parser_ctx **ctxp)
303 struct filter_parser_ctx *ctx = NULL;
306 LTTNG_ASSERT(filter_expression);
310 * Casting const to non-const, as the underlying function will use it in
313 fmem = lttng_fmemopen((void *) filter_expression,
314 strlen(filter_expression), "r");
316 fprintf(stderr, "Error opening memory as stream\n");
317 ret = -LTTNG_ERR_FILTER_NOMEM;
320 ctx = filter_parser_ctx_alloc(fmem);
322 fprintf(stderr, "Error allocating parser\n");
323 ret = -LTTNG_ERR_FILTER_NOMEM;
324 goto filter_alloc_error;
326 ret = filter_parser_ctx_append_ast(ctx);
328 fprintf(stderr, "Parse error\n");
329 ret = -LTTNG_ERR_FILTER_INVAL;
333 ret = filter_visitor_print_xml(ctx, stdout, 0);
336 fprintf(stderr, "XML print error\n");
337 ret = -LTTNG_ERR_FILTER_INVAL;
342 dbg_printf("Generating IR... ");
344 ret = filter_visitor_ir_generate(ctx);
346 fprintf(stderr, "Generate IR error\n");
347 ret = -LTTNG_ERR_FILTER_INVAL;
350 dbg_printf("done\n");
352 dbg_printf("Validating IR... ");
354 ret = filter_visitor_ir_check_binary_op_nesting(ctx);
356 ret = -LTTNG_ERR_FILTER_INVAL;
360 /* Normalize globbing patterns in the expression. */
361 ret = filter_visitor_ir_normalize_glob_patterns(ctx);
363 ret = -LTTNG_ERR_FILTER_INVAL;
367 /* Validate strings used as literals in the expression. */
368 ret = filter_visitor_ir_validate_string(ctx);
370 ret = -LTTNG_ERR_FILTER_INVAL;
374 /* Validate globbing patterns in the expression. */
375 ret = filter_visitor_ir_validate_globbing(ctx);
377 ret = -LTTNG_ERR_FILTER_INVAL;
381 dbg_printf("done\n");
383 dbg_printf("Generating bytecode... ");
385 ret = filter_visitor_bytecode_generate(ctx);
387 fprintf(stderr, "Generate bytecode error\n");
388 ret = -LTTNG_ERR_FILTER_INVAL;
391 dbg_printf("done\n");
392 dbg_printf("Size of bytecode generated: %u bytes.\n",
393 bytecode_get_len(&ctx->bytecode->b));
395 /* No need to keep the memory stream. */
396 if (fclose(fmem) != 0) {
397 fprintf(stderr, "fclose (%d) \n", errno);
398 ret = -LTTNG_ERR_FILTER_INVAL;
406 filter_parser_ctx_free(ctx);
408 if (fclose(fmem) != 0) {
409 fprintf(stderr, "fclose (%d) \n", errno);
419 #include "common/macros.h"
421 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src);
426 %parse-param {struct filter_parser_ctx *parser_ctx}
427 %parse-param {yyscan_t scanner}
428 %lex-param {yyscan_t scanner}
429 %start translation_unit
430 %token CHARACTER_CONSTANT_START SQUOTE STRING_LITERAL_START DQUOTE
431 %token ESCSEQ CHAR_STRING_TOKEN
432 %token DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT FLOAT_CONSTANT
433 %token LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW
434 %token STAR PLUS MINUS
435 %token MOD_OP DIV_OP RIGHT_OP LEFT_OP
436 %token EQ_OP NE_OP LE_OP GE_OP LT_OP GT_OP AND_OP OR_OP NOT_OP
437 %token ASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA
438 %token XOR_BIN AND_BIN OR_BIN NOT_BIN
440 %token <gs> IDENTIFIER GLOBAL_IDENTIFIER
446 struct gc_string *gs;
447 struct filter_node *n;
450 %type <gs> s_char s_char_sequence c_char c_char_sequence
452 %type <n> primary_expression
453 %type <n> prefix_expression
454 %type <n> prefix_expression_rec
455 %type <n> postfix_expression
456 %type <n> unary_expression
457 %type <n> unary_operator
458 %type <n> multiplicative_expression
459 %type <n> additive_expression
460 %type <n> shift_expression
461 %type <n> relational_expression
462 %type <n> equality_expression
463 %type <n> and_expression
464 %type <n> exclusive_or_expression
465 %type <n> inclusive_or_expression
466 %type <n> logical_and_expression
467 %type <n> logical_or_expression
469 %type <n> identifiers
479 | c_char_sequence c_char
480 { $$ = gc_string_append(parser_ctx, $1, $2); }
488 parse_error(parser_ctx, "escape sequences not supported yet");
492 /* 1.6 String literals */
497 | s_char_sequence s_char
498 { $$ = gc_string_append(parser_ctx, $1, $2); }
506 parse_error(parser_ctx, "escape sequences not supported yet");
513 $$ = make_node(parser_ctx, NODE_EXPRESSION);
514 $$->u.expression.type = AST_EXP_CONSTANT;
515 if (sscanf(yylval.gs->s, "%" WIDTH_u64_SCANF_IS_A_BROKEN_API SCNu64,
516 &$$->u.expression.u.constant) != 1) {
517 parse_error(parser_ctx, "cannot scanf decimal constant");
522 $$ = make_node(parser_ctx, NODE_EXPRESSION);
523 $$->u.expression.type = AST_EXP_CONSTANT;
524 if (!strcmp(yylval.gs->s, "0")) {
525 $$->u.expression.u.constant = 0;
526 } else if (sscanf(yylval.gs->s, "0%" WIDTH_o64_SCANF_IS_A_BROKEN_API SCNo64,
527 &$$->u.expression.u.constant) != 1) {
528 parse_error(parser_ctx, "cannot scanf octal constant");
531 | HEXADECIMAL_CONSTANT
533 $$ = make_node(parser_ctx, NODE_EXPRESSION);
534 $$->u.expression.type = AST_EXP_CONSTANT;
535 if (sscanf(yylval.gs->s, "0x%" WIDTH_x64_SCANF_IS_A_BROKEN_API SCNx64,
536 &$$->u.expression.u.constant) != 1) {
537 parse_error(parser_ctx, "cannot scanf hexadecimal constant");
542 $$ = make_node(parser_ctx, NODE_EXPRESSION);
543 $$->u.expression.type = AST_EXP_FLOAT_CONSTANT;
544 if (sscanf(yylval.gs->s, "%" WIDTH_lg_SCANF_IS_A_BROKEN_API "lg",
545 &$$->u.expression.u.float_constant) != 1) {
546 parse_error(parser_ctx, "cannot scanf float constant");
549 | STRING_LITERAL_START DQUOTE
551 $$ = make_node(parser_ctx, NODE_EXPRESSION);
552 $$->u.expression.type = AST_EXP_STRING;
553 $$->u.expression.u.string = "";
555 | STRING_LITERAL_START s_char_sequence DQUOTE
557 $$ = make_node(parser_ctx, NODE_EXPRESSION);
558 $$->u.expression.type = AST_EXP_STRING;
559 $$->u.expression.u.string = $2->s;
561 | CHARACTER_CONSTANT_START c_char_sequence SQUOTE
563 $$ = make_node(parser_ctx, NODE_EXPRESSION);
564 $$->u.expression.type = AST_EXP_STRING;
565 $$->u.expression.u.string = $2->s;
567 | LPAREN expression RPAREN
569 $$ = make_node(parser_ctx, NODE_EXPRESSION);
570 $$->u.expression.type = AST_EXP_NESTED;
571 $$->u.expression.u.child = $2;
578 $$ = make_node(parser_ctx, NODE_EXPRESSION);
579 $$->u.expression.type = AST_EXP_IDENTIFIER;
580 $$->u.expression.u.identifier = yylval.gs->s;
584 $$ = make_node(parser_ctx, NODE_EXPRESSION);
585 $$->u.expression.type = AST_EXP_GLOBAL_IDENTIFIER;
586 $$->u.expression.u.identifier = yylval.gs->s;
590 prefix_expression_rec
591 : LSBRAC unary_expression RSBRAC
595 | LSBRAC unary_expression RSBRAC prefix_expression_rec
598 $$->u.expression.pre_op = AST_LINK_BRACKET;
599 $$->u.expression.prev = $4;
608 | identifiers prefix_expression_rec
611 $$->u.expression.pre_op = AST_LINK_BRACKET;
612 $$->u.expression.next_bracket = $2;
621 | postfix_expression DOT prefix_expression
624 $$->u.expression.post_op = AST_LINK_DOT;
625 $$->u.expression.prev = $1;
627 | postfix_expression RARROW prefix_expression
630 $$->u.expression.post_op = AST_LINK_RARROW;
631 $$->u.expression.prev = $1;
640 | unary_operator unary_expression
643 $$->u.unary_op.child = $2;
650 $$ = make_node(parser_ctx, NODE_UNARY_OP);
651 $$->u.unary_op.type = AST_UNARY_PLUS;
655 $$ = make_node(parser_ctx, NODE_UNARY_OP);
656 $$->u.unary_op.type = AST_UNARY_MINUS;
660 $$ = make_node(parser_ctx, NODE_UNARY_OP);
661 $$->u.unary_op.type = AST_UNARY_NOT;
665 $$ = make_node(parser_ctx, NODE_UNARY_OP);
666 $$->u.unary_op.type = AST_UNARY_BIT_NOT;
670 multiplicative_expression
673 | multiplicative_expression STAR unary_expression
675 $$ = make_op_node(parser_ctx, AST_OP_MUL, $1, $3);
677 | multiplicative_expression DIV_OP unary_expression
679 $$ = make_op_node(parser_ctx, AST_OP_DIV, $1, $3);
681 | multiplicative_expression MOD_OP unary_expression
683 $$ = make_op_node(parser_ctx, AST_OP_MOD, $1, $3);
688 : multiplicative_expression
690 | additive_expression PLUS multiplicative_expression
692 $$ = make_op_node(parser_ctx, AST_OP_PLUS, $1, $3);
694 | additive_expression MINUS multiplicative_expression
696 $$ = make_op_node(parser_ctx, AST_OP_MINUS, $1, $3);
701 : additive_expression
703 | shift_expression LEFT_OP additive_expression
705 $$ = make_op_node(parser_ctx, AST_OP_BIT_LSHIFT, $1, $3);
707 | shift_expression RIGHT_OP additive_expression
709 $$ = make_op_node(parser_ctx, AST_OP_BIT_RSHIFT, $1, $3);
716 | and_expression AND_BIN shift_expression
718 $$ = make_op_node(parser_ctx, AST_OP_BIT_AND, $1, $3);
722 exclusive_or_expression
725 | exclusive_or_expression XOR_BIN and_expression
727 $$ = make_op_node(parser_ctx, AST_OP_BIT_XOR, $1, $3);
731 inclusive_or_expression
732 : exclusive_or_expression
734 | inclusive_or_expression OR_BIN exclusive_or_expression
736 $$ = make_op_node(parser_ctx, AST_OP_BIT_OR, $1, $3);
740 relational_expression
741 : inclusive_or_expression
743 | relational_expression LT_OP inclusive_or_expression
745 $$ = make_op_node(parser_ctx, AST_OP_LT, $1, $3);
747 | relational_expression GT_OP inclusive_or_expression
749 $$ = make_op_node(parser_ctx, AST_OP_GT, $1, $3);
751 | relational_expression LE_OP inclusive_or_expression
753 $$ = make_op_node(parser_ctx, AST_OP_LE, $1, $3);
755 | relational_expression GE_OP inclusive_or_expression
757 $$ = make_op_node(parser_ctx, AST_OP_GE, $1, $3);
762 : relational_expression
764 | equality_expression EQ_OP relational_expression
766 $$ = make_op_node(parser_ctx, AST_OP_EQ, $1, $3);
768 | equality_expression NE_OP relational_expression
770 $$ = make_op_node(parser_ctx, AST_OP_NE, $1, $3);
774 logical_and_expression
775 : equality_expression
777 | logical_and_expression AND_OP equality_expression
779 $$ = make_op_node(parser_ctx, AST_OP_AND, $1, $3);
783 logical_or_expression
784 : logical_and_expression
786 | logical_or_expression OR_OP logical_and_expression
788 $$ = make_op_node(parser_ctx, AST_OP_OR, $1, $3);
793 : logical_or_expression
800 parser_ctx->ast->root.u.root.child = $1;