5 * LTTng filter expression parser
7 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
9 * SPDX-License-Identifier: LGPL-2.1-only
11 * Grammar inspired from http://www.quut.com/c/ANSI-C-grammar-y.html
19 #include "common/bytecode/bytecode.hpp"
20 #include "filter-ast.hpp"
21 #include "filter-parser.hpp"
22 #include "memstream.hpp"
24 #include <common/compat/errno.hpp>
25 #include <common/macros.hpp>
27 #define WIDTH_u64_SCANF_IS_A_BROKEN_API "20"
28 #define WIDTH_o64_SCANF_IS_A_BROKEN_API "22"
29 #define WIDTH_x64_SCANF_IS_A_BROKEN_API "17"
30 #define WIDTH_lg_SCANF_IS_A_BROKEN_API "4096" /* Hugely optimistic approximation */
33 * bison 3.8.2 produces a parser with unused, but set, which results in warnings
37 #pragma clang diagnostic ignored "-Wunused-but-set-variable"
38 #endif /* __clang__ */
41 static const int print_xml = 1;
42 #define dbg_printf(fmt, args...) \
43 printf("[debug filter_parser] " fmt, ## args)
45 static const int print_xml = 0;
46 #define dbg_printf(fmt, args...) \
48 /* do nothing but check printf format */ \
50 printf("[debug filter_parser] " fmt, ## args); \
54 int filter_parser_debug = 0;
56 int yyparse(struct filter_parser_ctx *parser_ctx, yyscan_t scanner);
57 int yylex(union YYSTYPE *yyval, yyscan_t scanner);
58 int yylex_init_extra(struct filter_parser_ctx *parser_ctx, yyscan_t * ptr_yy_globals);
59 int yylex_destroy(yyscan_t yyparser_ctx);
60 void yyrestart(FILE * in_str, yyscan_t parser_ctx);
63 struct cds_list_head gc;
69 const char *node_type_str(enum node_type type)
73 return "NODE_UNKNOWN";
77 return "NODE_EXPRESSION";
81 return "NODE_UNARY_OP";
89 const char *node_type(struct filter_node *node)
91 if (node->type < NR_NODE_TYPES)
92 return node_type_str(node->type);
97 static struct gc_string *gc_string_alloc(struct filter_parser_ctx *parser_ctx,
100 struct gc_string *gstr;
103 /* TODO: could be faster with find first bit or glib Gstring */
104 /* sizeof long to account for malloc header (int or long ?) */
105 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + len;
108 gstr = zmalloc<gc_string>(alloclen);
112 cds_list_add(&gstr->gc, &parser_ctx->allocated_strings);
113 gstr->alloclen = alloclen;
119 * note: never use gc_string_append on a string that has external references.
120 * gsrc will be garbage collected immediately, and gstr might be.
121 * Should only be used to append characters to a string literal or constant.
124 struct gc_string *gc_string_append(struct filter_parser_ctx *parser_ctx,
125 struct gc_string *gstr,
126 struct gc_string *gsrc)
128 size_t newlen = strlen(gsrc->s) + strlen(gstr->s) + 1;
131 /* TODO: could be faster with find first bit or glib Gstring */
132 /* sizeof long to account for malloc header (int or long ?) */
133 for (alloclen = 8; alloclen < sizeof(long) + sizeof(*gstr) + newlen;
136 if (alloclen > gstr->alloclen) {
137 struct gc_string *newgstr;
139 newgstr = gc_string_alloc(parser_ctx, newlen);
140 strcpy(newgstr->s, gstr->s);
141 strcat(newgstr->s, gsrc->s);
142 cds_list_del(&gstr->gc);
146 strcat(gstr->s, gsrc->s);
148 cds_list_del(&gsrc->gc);
153 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src)
155 lvalp->gs = gc_string_alloc(parser_ctx, strlen(src) + 1);
156 strcpy(lvalp->gs->s, src);
159 static struct filter_node *make_node(struct filter_parser_ctx *scanner,
162 struct filter_ast *ast = filter_parser_get_ast(scanner);
163 struct filter_node *node;
165 node = zmalloc<filter_node>();
168 memset(node, 0, sizeof(*node));
170 cds_list_add(&node->gc, &ast->allocated_nodes);
174 fprintf(stderr, "[error] %s: trying to create root node\n", __func__);
177 case NODE_EXPRESSION:
186 fprintf(stderr, "[error] %s: unknown node type %d\n", __func__,
194 static struct filter_node *make_op_node(struct filter_parser_ctx *scanner,
196 struct filter_node *lchild,
197 struct filter_node *rchild)
199 struct filter_ast *ast = filter_parser_get_ast(scanner);
200 struct filter_node *node;
202 node = zmalloc<filter_node>();
205 memset(node, 0, sizeof(*node));
206 node->type = NODE_OP;
207 cds_list_add(&node->gc, &ast->allocated_nodes);
208 node->u.op.type = type;
209 node->u.op.lchild = lchild;
210 node->u.op.rchild = rchild;
215 void yyerror(struct filter_parser_ctx *parser_ctx, yyscan_t scanner, const char *str)
217 fprintf(stderr, "error %s\n", str);
220 #define parse_error(parser_ctx, str) \
222 yyerror(parser_ctx, parser_ctx->scanner, YY_("parse error: " str "\n")); \
226 static void free_strings(struct cds_list_head *list)
228 struct gc_string *gstr, *tmp;
230 cds_list_for_each_entry_safe(gstr, tmp, list, gc)
234 static struct filter_ast *filter_ast_alloc(void)
236 struct filter_ast *ast;
238 ast = zmalloc<filter_ast>();
241 memset(ast, 0, sizeof(*ast));
242 CDS_INIT_LIST_HEAD(&ast->allocated_nodes);
243 ast->root.type = NODE_ROOT;
247 static void filter_ast_free(struct filter_ast *ast)
249 struct filter_node *node, *tmp;
251 cds_list_for_each_entry_safe(node, tmp, &ast->allocated_nodes, gc)
256 int filter_parser_ctx_append_ast(struct filter_parser_ctx *parser_ctx)
258 return yyparse(parser_ctx, parser_ctx->scanner);
261 struct filter_parser_ctx *filter_parser_ctx_alloc(FILE *input)
263 struct filter_parser_ctx *parser_ctx;
266 yydebug = filter_parser_debug;
268 parser_ctx = zmalloc<filter_parser_ctx>();
271 memset(parser_ctx, 0, sizeof(*parser_ctx));
273 ret = yylex_init_extra(parser_ctx, &parser_ctx->scanner);
275 fprintf(stderr, "yylex_init error\n");
276 goto cleanup_parser_ctx;
278 /* Start processing new stream */
279 yyrestart(input, parser_ctx->scanner);
281 parser_ctx->ast = filter_ast_alloc();
282 if (!parser_ctx->ast)
284 CDS_INIT_LIST_HEAD(&parser_ctx->allocated_strings);
287 fprintf(stdout, "parser_ctx input is a%s.\n",
288 isatty(fileno(input)) ? "n interactive tty" :
289 " noninteractive file");
294 ret = yylex_destroy(parser_ctx->scanner);
296 fprintf(stderr, "yylex_destroy error\n");
302 void filter_parser_ctx_free(struct filter_parser_ctx *parser_ctx)
306 ret = yylex_destroy(parser_ctx->scanner);
308 fprintf(stderr, "yylex_destroy error\n");
310 filter_ast_free(parser_ctx->ast);
311 free_strings(&parser_ctx->allocated_strings);
312 filter_ir_free(parser_ctx);
313 free(parser_ctx->bytecode);
314 free(parser_ctx->bytecode_reloc);
319 int filter_parser_ctx_create_from_filter_expression(
320 const char *filter_expression, struct filter_parser_ctx **ctxp)
323 struct filter_parser_ctx *ctx = NULL;
326 LTTNG_ASSERT(filter_expression);
330 * Casting const to non-const, as the underlying function will use it in
333 fmem = lttng_fmemopen((void *) filter_expression,
334 strlen(filter_expression), "r");
336 fprintf(stderr, "Error opening memory as stream\n");
337 ret = -LTTNG_ERR_FILTER_NOMEM;
340 ctx = filter_parser_ctx_alloc(fmem);
342 fprintf(stderr, "Error allocating parser\n");
343 ret = -LTTNG_ERR_FILTER_NOMEM;
344 goto filter_alloc_error;
346 ret = filter_parser_ctx_append_ast(ctx);
348 fprintf(stderr, "Parse error\n");
349 ret = -LTTNG_ERR_FILTER_INVAL;
353 ret = filter_visitor_print_xml(ctx, stdout, 0);
356 fprintf(stderr, "XML print error\n");
357 ret = -LTTNG_ERR_FILTER_INVAL;
362 dbg_printf("Generating IR... ");
364 ret = filter_visitor_ir_generate(ctx);
366 fprintf(stderr, "Generate IR error\n");
367 ret = -LTTNG_ERR_FILTER_INVAL;
370 dbg_printf("done\n");
372 dbg_printf("Validating IR... ");
374 ret = filter_visitor_ir_check_binary_op_nesting(ctx);
376 ret = -LTTNG_ERR_FILTER_INVAL;
380 /* Normalize globbing patterns in the expression. */
381 ret = filter_visitor_ir_normalize_glob_patterns(ctx);
383 ret = -LTTNG_ERR_FILTER_INVAL;
387 /* Validate strings used as literals in the expression. */
388 ret = filter_visitor_ir_validate_string(ctx);
390 ret = -LTTNG_ERR_FILTER_INVAL;
394 /* Validate globbing patterns in the expression. */
395 ret = filter_visitor_ir_validate_globbing(ctx);
397 ret = -LTTNG_ERR_FILTER_INVAL;
401 dbg_printf("done\n");
403 dbg_printf("Generating bytecode... ");
405 ret = filter_visitor_bytecode_generate(ctx);
407 fprintf(stderr, "Generate bytecode error\n");
408 ret = -LTTNG_ERR_FILTER_INVAL;
411 dbg_printf("done\n");
412 dbg_printf("Size of bytecode generated: %u bytes.\n",
413 bytecode_get_len(&ctx->bytecode->b));
415 /* No need to keep the memory stream. */
416 if (fclose(fmem) != 0) {
417 fprintf(stderr, "fclose (%d) \n", errno);
418 ret = -LTTNG_ERR_FILTER_INVAL;
426 filter_parser_ctx_free(ctx);
428 if (fclose(fmem) != 0) {
429 fprintf(stderr, "fclose (%d) \n", errno);
439 #include "common/macros.hpp"
441 void setstring(struct filter_parser_ctx *parser_ctx, YYSTYPE *lvalp, const char *src);
446 %parse-param {struct filter_parser_ctx *parser_ctx}
447 %parse-param {yyscan_t scanner}
448 %lex-param {yyscan_t scanner}
449 %start translation_unit
450 %token CHARACTER_CONSTANT_START SQUOTE STRING_LITERAL_START DQUOTE
451 %token ESCSEQ CHAR_STRING_TOKEN
452 %token DECIMAL_CONSTANT OCTAL_CONSTANT HEXADECIMAL_CONSTANT FLOAT_CONSTANT
453 %token LSBRAC RSBRAC LPAREN RPAREN LBRAC RBRAC RARROW
454 %token STAR PLUS MINUS
455 %token MOD_OP DIV_OP RIGHT_OP LEFT_OP
456 %token EQ_OP NE_OP LE_OP GE_OP LT_OP GT_OP AND_OP OR_OP NOT_OP
457 %token ASSIGN COLON SEMICOLON DOTDOTDOT DOT EQUAL COMMA
458 %token XOR_BIN AND_BIN OR_BIN NOT_BIN
460 %token <gs> IDENTIFIER GLOBAL_IDENTIFIER
466 struct gc_string *gs;
467 struct filter_node *n;
470 %type <gs> s_char s_char_sequence c_char c_char_sequence
472 %type <n> primary_expression
473 %type <n> prefix_expression
474 %type <n> prefix_expression_rec
475 %type <n> postfix_expression
476 %type <n> unary_expression
477 %type <n> unary_operator
478 %type <n> multiplicative_expression
479 %type <n> additive_expression
480 %type <n> shift_expression
481 %type <n> relational_expression
482 %type <n> equality_expression
483 %type <n> and_expression
484 %type <n> exclusive_or_expression
485 %type <n> inclusive_or_expression
486 %type <n> logical_and_expression
487 %type <n> logical_or_expression
489 %type <n> identifiers
499 | c_char_sequence c_char
500 { $$ = gc_string_append(parser_ctx, $1, $2); }
508 parse_error(parser_ctx, "escape sequences not supported yet");
512 /* 1.6 String literals */
517 | s_char_sequence s_char
518 { $$ = gc_string_append(parser_ctx, $1, $2); }
526 parse_error(parser_ctx, "escape sequences not supported yet");
533 $$ = make_node(parser_ctx, NODE_EXPRESSION);
534 $$->u.expression.type = AST_EXP_CONSTANT;
535 if (sscanf(yylval.gs->s, "%" WIDTH_u64_SCANF_IS_A_BROKEN_API SCNu64,
536 &$$->u.expression.u.constant) != 1) {
537 parse_error(parser_ctx, "cannot scanf decimal constant");
542 $$ = make_node(parser_ctx, NODE_EXPRESSION);
543 $$->u.expression.type = AST_EXP_CONSTANT;
544 if (!strcmp(yylval.gs->s, "0")) {
545 $$->u.expression.u.constant = 0;
546 } else if (sscanf(yylval.gs->s, "0%" WIDTH_o64_SCANF_IS_A_BROKEN_API SCNo64,
547 &$$->u.expression.u.constant) != 1) {
548 parse_error(parser_ctx, "cannot scanf octal constant");
551 | HEXADECIMAL_CONSTANT
553 $$ = make_node(parser_ctx, NODE_EXPRESSION);
554 $$->u.expression.type = AST_EXP_CONSTANT;
555 if (sscanf(yylval.gs->s, "0x%" WIDTH_x64_SCANF_IS_A_BROKEN_API SCNx64,
556 &$$->u.expression.u.constant) != 1) {
557 parse_error(parser_ctx, "cannot scanf hexadecimal constant");
562 $$ = make_node(parser_ctx, NODE_EXPRESSION);
563 $$->u.expression.type = AST_EXP_FLOAT_CONSTANT;
564 if (sscanf(yylval.gs->s, "%" WIDTH_lg_SCANF_IS_A_BROKEN_API "lg",
565 &$$->u.expression.u.float_constant) != 1) {
566 parse_error(parser_ctx, "cannot scanf float constant");
569 | STRING_LITERAL_START DQUOTE
571 $$ = make_node(parser_ctx, NODE_EXPRESSION);
572 $$->u.expression.type = AST_EXP_STRING;
573 $$->u.expression.u.string = "";
575 | STRING_LITERAL_START s_char_sequence DQUOTE
577 $$ = make_node(parser_ctx, NODE_EXPRESSION);
578 $$->u.expression.type = AST_EXP_STRING;
579 $$->u.expression.u.string = $2->s;
581 | CHARACTER_CONSTANT_START c_char_sequence SQUOTE
583 $$ = make_node(parser_ctx, NODE_EXPRESSION);
584 $$->u.expression.type = AST_EXP_STRING;
585 $$->u.expression.u.string = $2->s;
587 | LPAREN expression RPAREN
589 $$ = make_node(parser_ctx, NODE_EXPRESSION);
590 $$->u.expression.type = AST_EXP_NESTED;
591 $$->u.expression.u.child = $2;
598 $$ = make_node(parser_ctx, NODE_EXPRESSION);
599 $$->u.expression.type = AST_EXP_IDENTIFIER;
600 $$->u.expression.u.identifier = yylval.gs->s;
604 $$ = make_node(parser_ctx, NODE_EXPRESSION);
605 $$->u.expression.type = AST_EXP_GLOBAL_IDENTIFIER;
606 $$->u.expression.u.identifier = yylval.gs->s;
610 prefix_expression_rec
611 : LSBRAC unary_expression RSBRAC
615 | LSBRAC unary_expression RSBRAC prefix_expression_rec
618 $$->u.expression.pre_op = AST_LINK_BRACKET;
619 $$->u.expression.prev = $4;
628 | identifiers prefix_expression_rec
631 $$->u.expression.pre_op = AST_LINK_BRACKET;
632 $$->u.expression.next_bracket = $2;
641 | postfix_expression DOT prefix_expression
644 $$->u.expression.post_op = AST_LINK_DOT;
645 $$->u.expression.prev = $1;
647 | postfix_expression RARROW prefix_expression
650 $$->u.expression.post_op = AST_LINK_RARROW;
651 $$->u.expression.prev = $1;
660 | unary_operator unary_expression
663 $$->u.unary_op.child = $2;
670 $$ = make_node(parser_ctx, NODE_UNARY_OP);
671 $$->u.unary_op.type = AST_UNARY_PLUS;
675 $$ = make_node(parser_ctx, NODE_UNARY_OP);
676 $$->u.unary_op.type = AST_UNARY_MINUS;
680 $$ = make_node(parser_ctx, NODE_UNARY_OP);
681 $$->u.unary_op.type = AST_UNARY_NOT;
685 $$ = make_node(parser_ctx, NODE_UNARY_OP);
686 $$->u.unary_op.type = AST_UNARY_BIT_NOT;
690 multiplicative_expression
693 | multiplicative_expression STAR unary_expression
695 $$ = make_op_node(parser_ctx, AST_OP_MUL, $1, $3);
697 | multiplicative_expression DIV_OP unary_expression
699 $$ = make_op_node(parser_ctx, AST_OP_DIV, $1, $3);
701 | multiplicative_expression MOD_OP unary_expression
703 $$ = make_op_node(parser_ctx, AST_OP_MOD, $1, $3);
708 : multiplicative_expression
710 | additive_expression PLUS multiplicative_expression
712 $$ = make_op_node(parser_ctx, AST_OP_PLUS, $1, $3);
714 | additive_expression MINUS multiplicative_expression
716 $$ = make_op_node(parser_ctx, AST_OP_MINUS, $1, $3);
721 : additive_expression
723 | shift_expression LEFT_OP additive_expression
725 $$ = make_op_node(parser_ctx, AST_OP_BIT_LSHIFT, $1, $3);
727 | shift_expression RIGHT_OP additive_expression
729 $$ = make_op_node(parser_ctx, AST_OP_BIT_RSHIFT, $1, $3);
736 | and_expression AND_BIN shift_expression
738 $$ = make_op_node(parser_ctx, AST_OP_BIT_AND, $1, $3);
742 exclusive_or_expression
745 | exclusive_or_expression XOR_BIN and_expression
747 $$ = make_op_node(parser_ctx, AST_OP_BIT_XOR, $1, $3);
751 inclusive_or_expression
752 : exclusive_or_expression
754 | inclusive_or_expression OR_BIN exclusive_or_expression
756 $$ = make_op_node(parser_ctx, AST_OP_BIT_OR, $1, $3);
760 relational_expression
761 : inclusive_or_expression
763 | relational_expression LT_OP inclusive_or_expression
765 $$ = make_op_node(parser_ctx, AST_OP_LT, $1, $3);
767 | relational_expression GT_OP inclusive_or_expression
769 $$ = make_op_node(parser_ctx, AST_OP_GT, $1, $3);
771 | relational_expression LE_OP inclusive_or_expression
773 $$ = make_op_node(parser_ctx, AST_OP_LE, $1, $3);
775 | relational_expression GE_OP inclusive_or_expression
777 $$ = make_op_node(parser_ctx, AST_OP_GE, $1, $3);
782 : relational_expression
784 | equality_expression EQ_OP relational_expression
786 $$ = make_op_node(parser_ctx, AST_OP_EQ, $1, $3);
788 | equality_expression NE_OP relational_expression
790 $$ = make_op_node(parser_ctx, AST_OP_NE, $1, $3);
794 logical_and_expression
795 : equality_expression
797 | logical_and_expression AND_OP equality_expression
799 $$ = make_op_node(parser_ctx, AST_OP_AND, $1, $3);
803 logical_or_expression
804 : logical_and_expression
806 | logical_or_expression OR_OP logical_and_expression
808 $$ = make_op_node(parser_ctx, AST_OP_OR, $1, $3);
813 : logical_or_expression
820 parser_ctx->ast->root.u.root.child = $1;