common: move bytecode utilities from filter to its own file
[lttng-tools.git] / src / common / filter / filter-visitor-generate-bytecode.c
1 /*
2 * filter-visitor-generate-bytecode.c
3 *
4 * LTTng filter bytecode generation
5 *
6 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * SPDX-License-Identifier: LGPL-2.1-only
9 *
10 */
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <common/align.h>
15 #include <common/compat/errno.h>
16 #include <common/compat/string.h>
17
18 #include "common/align.h"
19 #include "common/bytecode/bytecode.h"
20 #include "common/compat/string.h"
21 #include "common/macros.h"
22 #include "filter-ast.h"
23 #include "filter-ir.h"
24
25 #ifndef max_t
26 #define max_t(type, a, b) ((type) ((a) > (b) ? (a) : (b)))
27 #endif
28
29 static
30 int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
31 struct ir_op *node);
32
33 static
34 int bytecode_patch(struct lttng_filter_bytecode_alloc **fb,
35 const void *data,
36 uint16_t offset,
37 uint32_t len)
38 {
39 if (offset >= (*fb)->b.len) {
40 return -EINVAL;
41 }
42 memcpy(&(*fb)->b.data[offset], data, len);
43 return 0;
44 }
45
46 static
47 int visit_node_root(struct filter_parser_ctx *ctx, struct ir_op *node)
48 {
49 int ret;
50 struct return_op insn;
51
52 /* Visit child */
53 ret = recursive_visit_gen_bytecode(ctx, node->u.root.child);
54 if (ret)
55 return ret;
56
57 /* Generate end of bytecode instruction */
58 insn.op = FILTER_OP_RETURN;
59 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
60 }
61
62 static
63 int append_str(char **s, const char *append)
64 {
65 char *old = *s;
66 char *new;
67 size_t oldlen = (old == NULL) ? 0 : strlen(old);
68 size_t appendlen = strlen(append);
69
70 new = calloc(oldlen + appendlen + 1, 1);
71 if (!new) {
72 return -ENOMEM;
73 }
74 if (oldlen) {
75 strcpy(new, old);
76 }
77 strcat(new, append);
78 *s = new;
79 free(old);
80 return 0;
81 }
82
83 /*
84 * 1: match
85 * 0: no match
86 * < 0: error
87 */
88 static
89 int load_expression_legacy_match(const struct ir_load_expression *exp,
90 enum filter_op *op_type,
91 char **symbol)
92 {
93 const struct ir_load_expression_op *op;
94 bool need_dot = false;
95
96 op = exp->child;
97 switch (op->type) {
98 case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
99 *op_type = FILTER_OP_GET_CONTEXT_REF;
100 if (append_str(symbol, "$ctx.")) {
101 return -ENOMEM;
102 }
103 need_dot = false;
104 break;
105 case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
106 *op_type = FILTER_OP_GET_CONTEXT_REF;
107 if (append_str(symbol, "$app.")) {
108 return -ENOMEM;
109 }
110 need_dot = false;
111 break;
112 case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
113 *op_type = FILTER_OP_LOAD_FIELD_REF;
114 need_dot = false;
115 break;
116
117 case IR_LOAD_EXPRESSION_GET_SYMBOL:
118 case IR_LOAD_EXPRESSION_GET_INDEX:
119 case IR_LOAD_EXPRESSION_LOAD_FIELD:
120 default:
121 return 0; /* no match */
122 }
123
124 for (;;) {
125 op = op->next;
126 if (!op) {
127 return 0; /* no match */
128 }
129 switch (op->type) {
130 case IR_LOAD_EXPRESSION_LOAD_FIELD:
131 goto end;
132 case IR_LOAD_EXPRESSION_GET_SYMBOL:
133 if (need_dot && append_str(symbol, ".")) {
134 return -ENOMEM;
135 }
136 if (append_str(symbol, op->u.symbol)) {
137 return -ENOMEM;
138 }
139 break;
140 default:
141 return 0; /* no match */
142 }
143 need_dot = true;
144 }
145 end:
146 return 1; /* Legacy match */
147 }
148
149 /*
150 * 1: legacy match
151 * 0: no legacy match
152 * < 0: error
153 */
154 static
155 int visit_node_load_expression_legacy(struct filter_parser_ctx *ctx,
156 const struct ir_load_expression *exp,
157 const struct ir_load_expression_op *op)
158 {
159 struct load_op *insn = NULL;
160 uint32_t insn_len = sizeof(struct load_op)
161 + sizeof(struct field_ref);
162 struct field_ref ref_offset;
163 uint32_t reloc_offset_u32;
164 uint16_t reloc_offset;
165 enum filter_op op_type;
166 char *symbol = NULL;
167 int ret;
168
169 ret = load_expression_legacy_match(exp, &op_type, &symbol);
170 if (ret <= 0) {
171 goto end;
172 }
173 insn = calloc(insn_len, 1);
174 if (!insn) {
175 ret = -ENOMEM;
176 goto end;
177 }
178 insn->op = op_type;
179 ref_offset.offset = (uint16_t) -1U;
180 memcpy(insn->data, &ref_offset, sizeof(ref_offset));
181 /* reloc_offset points to struct load_op */
182 reloc_offset_u32 = bytecode_get_len(&ctx->bytecode->b);
183 if (reloc_offset_u32 > LTTNG_FILTER_MAX_LEN - 1) {
184 ret = -EINVAL;
185 goto end;
186 }
187 reloc_offset = (uint16_t) reloc_offset_u32;
188 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
189 if (ret) {
190 goto end;
191 }
192 /* append reloc */
193 ret = bytecode_push(&ctx->bytecode_reloc, &reloc_offset,
194 1, sizeof(reloc_offset));
195 if (ret) {
196 goto end;
197 }
198 ret = bytecode_push(&ctx->bytecode_reloc, symbol,
199 1, strlen(symbol) + 1);
200 if (ret) {
201 goto end;
202 }
203 ret = 1; /* legacy */
204 end:
205 free(insn);
206 free(symbol);
207 return ret;
208 }
209
210 static
211 int visit_node_load_expression(struct filter_parser_ctx *ctx,
212 const struct ir_op *node)
213 {
214 struct ir_load_expression *exp;
215 struct ir_load_expression_op *op;
216 int ret;
217
218 exp = node->u.load.u.expression;
219 if (!exp) {
220 return -EINVAL;
221 }
222 op = exp->child;
223 if (!op) {
224 return -EINVAL;
225 }
226
227 /*
228 * TODO: if we remove legacy load for application contexts, we
229 * need to update session bytecode parser as well.
230 */
231 ret = visit_node_load_expression_legacy(ctx, exp, op);
232 if (ret < 0) {
233 return ret;
234 }
235 if (ret > 0) {
236 return 0; /* legacy */
237 }
238
239 for (; op != NULL; op = op->next) {
240 switch (op->type) {
241 case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
242 {
243 struct load_op *insn;
244 uint32_t insn_len = sizeof(struct load_op);
245 int ret;
246
247 insn = calloc(insn_len, 1);
248 if (!insn)
249 return -ENOMEM;
250 insn->op = FILTER_OP_GET_CONTEXT_ROOT;
251 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
252 free(insn);
253 if (ret) {
254 return ret;
255 }
256 break;
257 }
258 case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
259 {
260 struct load_op *insn;
261 uint32_t insn_len = sizeof(struct load_op);
262 int ret;
263
264 insn = calloc(insn_len, 1);
265 if (!insn)
266 return -ENOMEM;
267 insn->op = FILTER_OP_GET_APP_CONTEXT_ROOT;
268 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
269 free(insn);
270 if (ret) {
271 return ret;
272 }
273 break;
274 }
275 case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
276 {
277 struct load_op *insn;
278 uint32_t insn_len = sizeof(struct load_op);
279 int ret;
280
281 insn = calloc(insn_len, 1);
282 if (!insn)
283 return -ENOMEM;
284 insn->op = FILTER_OP_GET_PAYLOAD_ROOT;
285 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
286 free(insn);
287 if (ret) {
288 return ret;
289 }
290 break;
291 }
292 case IR_LOAD_EXPRESSION_GET_SYMBOL:
293 {
294 struct load_op *insn;
295 uint32_t insn_len = sizeof(struct load_op)
296 + sizeof(struct get_symbol);
297 struct get_symbol symbol_offset;
298 uint32_t reloc_offset_u32;
299 uint16_t reloc_offset;
300 uint32_t bytecode_reloc_offset_u32;
301 int ret;
302
303 insn = calloc(insn_len, 1);
304 if (!insn)
305 return -ENOMEM;
306 insn->op = FILTER_OP_GET_SYMBOL;
307 bytecode_reloc_offset_u32 =
308 bytecode_get_len(&ctx->bytecode_reloc->b)
309 + sizeof(reloc_offset);
310 symbol_offset.offset =
311 (uint16_t) bytecode_reloc_offset_u32;
312 memcpy(insn->data, &symbol_offset,
313 sizeof(symbol_offset));
314 /* reloc_offset points to struct load_op */
315 reloc_offset_u32 = bytecode_get_len(&ctx->bytecode->b);
316 if (reloc_offset_u32 > LTTNG_FILTER_MAX_LEN - 1) {
317 free(insn);
318 return -EINVAL;
319 }
320 reloc_offset = (uint16_t) reloc_offset_u32;
321 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
322 if (ret) {
323 free(insn);
324 return ret;
325 }
326 /* append reloc */
327 ret = bytecode_push(&ctx->bytecode_reloc, &reloc_offset,
328 1, sizeof(reloc_offset));
329 if (ret) {
330 free(insn);
331 return ret;
332 }
333 ret = bytecode_push(&ctx->bytecode_reloc,
334 op->u.symbol,
335 1, strlen(op->u.symbol) + 1);
336 free(insn);
337 if (ret) {
338 return ret;
339 }
340 break;
341 }
342 case IR_LOAD_EXPRESSION_GET_INDEX:
343 {
344 struct load_op *insn;
345 uint32_t insn_len = sizeof(struct load_op)
346 + sizeof(struct get_index_u64);
347 struct get_index_u64 index;
348 int ret;
349
350 insn = calloc(insn_len, 1);
351 if (!insn)
352 return -ENOMEM;
353 insn->op = FILTER_OP_GET_INDEX_U64;
354 index.index = op->u.index;
355 memcpy(insn->data, &index, sizeof(index));
356 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
357 free(insn);
358 if (ret) {
359 return ret;
360 }
361 break;
362 }
363 case IR_LOAD_EXPRESSION_LOAD_FIELD:
364 {
365 struct load_op *insn;
366 uint32_t insn_len = sizeof(struct load_op);
367 int ret;
368
369 insn = calloc(insn_len, 1);
370 if (!insn)
371 return -ENOMEM;
372 insn->op = FILTER_OP_LOAD_FIELD;
373 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
374 free(insn);
375 if (ret) {
376 return ret;
377 }
378 break;
379 }
380 }
381 }
382 return 0;
383 }
384
385 static
386 int visit_node_load(struct filter_parser_ctx *ctx, struct ir_op *node)
387 {
388 int ret;
389
390 switch (node->data_type) {
391 case IR_DATA_UNKNOWN:
392 default:
393 fprintf(stderr, "[error] Unknown data type in %s\n",
394 __func__);
395 return -EINVAL;
396
397 case IR_DATA_STRING:
398 {
399 struct load_op *insn;
400 uint32_t insn_len = sizeof(struct load_op)
401 + strlen(node->u.load.u.string.value) + 1;
402
403 insn = calloc(insn_len, 1);
404 if (!insn)
405 return -ENOMEM;
406
407 switch (node->u.load.u.string.type) {
408 case IR_LOAD_STRING_TYPE_GLOB_STAR:
409 /*
410 * We explicitly tell the interpreter here that
411 * this load is a full star globbing pattern so
412 * that the appropriate matching function can be
413 * called. Also, see comment below.
414 */
415 insn->op = FILTER_OP_LOAD_STAR_GLOB_STRING;
416 break;
417 default:
418 /*
419 * This is the "legacy" string, which includes
420 * star globbing patterns with a star only at
421 * the end. Both "plain" and "star at the end"
422 * literal strings are handled at the same place
423 * by the tracer's filter bytecode interpreter,
424 * whereas full star globbing patterns (stars
425 * can be anywhere in the string) is a special
426 * case.
427 */
428 insn->op = FILTER_OP_LOAD_STRING;
429 break;
430 }
431
432 strcpy(insn->data, node->u.load.u.string.value);
433 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
434 free(insn);
435 return ret;
436 }
437 case IR_DATA_NUMERIC:
438 {
439 struct load_op *insn;
440 uint32_t insn_len = sizeof(struct load_op)
441 + sizeof(struct literal_numeric);
442
443 insn = calloc(insn_len, 1);
444 if (!insn)
445 return -ENOMEM;
446 insn->op = FILTER_OP_LOAD_S64;
447 memcpy(insn->data, &node->u.load.u.num, sizeof(int64_t));
448 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
449 free(insn);
450 return ret;
451 }
452 case IR_DATA_FLOAT:
453 {
454 struct load_op *insn;
455 uint32_t insn_len = sizeof(struct load_op)
456 + sizeof(struct literal_double);
457
458 insn = calloc(insn_len, 1);
459 if (!insn)
460 return -ENOMEM;
461 insn->op = FILTER_OP_LOAD_DOUBLE;
462 memcpy(insn->data, &node->u.load.u.flt, sizeof(double));
463 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
464 free(insn);
465 return ret;
466 }
467 case IR_DATA_EXPRESSION:
468 return visit_node_load_expression(ctx, node);
469 }
470 }
471
472 static
473 int visit_node_unary(struct filter_parser_ctx *ctx, struct ir_op *node)
474 {
475 int ret;
476 struct unary_op insn;
477
478 /* Visit child */
479 ret = recursive_visit_gen_bytecode(ctx, node->u.unary.child);
480 if (ret)
481 return ret;
482
483 /* Generate end of bytecode instruction */
484 switch (node->u.unary.type) {
485 case AST_UNARY_UNKNOWN:
486 default:
487 fprintf(stderr, "[error] Unknown unary node type in %s\n",
488 __func__);
489 return -EINVAL;
490 case AST_UNARY_PLUS:
491 /* Nothing to do. */
492 return 0;
493 case AST_UNARY_MINUS:
494 insn.op = FILTER_OP_UNARY_MINUS;
495 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
496 case AST_UNARY_NOT:
497 insn.op = FILTER_OP_UNARY_NOT;
498 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
499 case AST_UNARY_BIT_NOT:
500 insn.op = FILTER_OP_UNARY_BIT_NOT;
501 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
502 }
503 }
504
505 /*
506 * Binary comparator nesting is disallowed. This allows fitting into
507 * only 2 registers.
508 */
509 static
510 int visit_node_binary(struct filter_parser_ctx *ctx, struct ir_op *node)
511 {
512 int ret;
513 struct binary_op insn;
514
515 /* Visit child */
516 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
517 if (ret)
518 return ret;
519 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
520 if (ret)
521 return ret;
522
523 switch (node->u.binary.type) {
524 case AST_OP_UNKNOWN:
525 default:
526 fprintf(stderr, "[error] Unknown unary node type in %s\n",
527 __func__);
528 return -EINVAL;
529
530 case AST_OP_AND:
531 case AST_OP_OR:
532 fprintf(stderr, "[error] Unexpected logical node type in %s\n",
533 __func__);
534 return -EINVAL;
535
536 case AST_OP_MUL:
537 insn.op = FILTER_OP_MUL;
538 break;
539 case AST_OP_DIV:
540 insn.op = FILTER_OP_DIV;
541 break;
542 case AST_OP_MOD:
543 insn.op = FILTER_OP_MOD;
544 break;
545 case AST_OP_PLUS:
546 insn.op = FILTER_OP_PLUS;
547 break;
548 case AST_OP_MINUS:
549 insn.op = FILTER_OP_MINUS;
550 break;
551 case AST_OP_BIT_RSHIFT:
552 insn.op = FILTER_OP_BIT_RSHIFT;
553 break;
554 case AST_OP_BIT_LSHIFT:
555 insn.op = FILTER_OP_BIT_LSHIFT;
556 break;
557 case AST_OP_BIT_AND:
558 insn.op = FILTER_OP_BIT_AND;
559 break;
560 case AST_OP_BIT_OR:
561 insn.op = FILTER_OP_BIT_OR;
562 break;
563 case AST_OP_BIT_XOR:
564 insn.op = FILTER_OP_BIT_XOR;
565 break;
566
567 case AST_OP_EQ:
568 insn.op = FILTER_OP_EQ;
569 break;
570 case AST_OP_NE:
571 insn.op = FILTER_OP_NE;
572 break;
573 case AST_OP_GT:
574 insn.op = FILTER_OP_GT;
575 break;
576 case AST_OP_LT:
577 insn.op = FILTER_OP_LT;
578 break;
579 case AST_OP_GE:
580 insn.op = FILTER_OP_GE;
581 break;
582 case AST_OP_LE:
583 insn.op = FILTER_OP_LE;
584 break;
585 }
586 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
587 }
588
589 /*
590 * A logical op always return a s64 (1 or 0).
591 */
592 static
593 int visit_node_logical(struct filter_parser_ctx *ctx, struct ir_op *node)
594 {
595 int ret;
596 struct logical_op insn;
597 uint16_t skip_offset_loc;
598 uint16_t target_loc;
599
600 /* Visit left child */
601 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
602 if (ret)
603 return ret;
604 /* Cast to s64 if float or field ref */
605 if ((node->u.binary.left->data_type == IR_DATA_FIELD_REF
606 || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
607 || node->u.binary.left->data_type == IR_DATA_EXPRESSION)
608 || node->u.binary.left->data_type == IR_DATA_FLOAT) {
609 struct cast_op cast_insn;
610
611 if (node->u.binary.left->data_type == IR_DATA_FIELD_REF
612 || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
613 || node->u.binary.left->data_type == IR_DATA_EXPRESSION) {
614 cast_insn.op = FILTER_OP_CAST_TO_S64;
615 } else {
616 cast_insn.op = FILTER_OP_CAST_DOUBLE_TO_S64;
617 }
618 ret = bytecode_push(&ctx->bytecode, &cast_insn,
619 1, sizeof(cast_insn));
620 if (ret)
621 return ret;
622 }
623 switch (node->u.logical.type) {
624 default:
625 fprintf(stderr, "[error] Unknown node type in %s\n",
626 __func__);
627 return -EINVAL;
628
629 case AST_OP_AND:
630 insn.op = FILTER_OP_AND;
631 break;
632 case AST_OP_OR:
633 insn.op = FILTER_OP_OR;
634 break;
635 }
636 insn.skip_offset = (uint16_t) -1UL; /* Temporary */
637 ret = bytecode_push_logical(&ctx->bytecode, &insn, 1, sizeof(insn),
638 &skip_offset_loc);
639 if (ret)
640 return ret;
641 /* Visit right child */
642 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
643 if (ret)
644 return ret;
645 /* Cast to s64 if float or field ref */
646 if ((node->u.binary.right->data_type == IR_DATA_FIELD_REF
647 || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
648 || node->u.binary.right->data_type == IR_DATA_EXPRESSION)
649 || node->u.binary.right->data_type == IR_DATA_FLOAT) {
650 struct cast_op cast_insn;
651
652 if (node->u.binary.right->data_type == IR_DATA_FIELD_REF
653 || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
654 || node->u.binary.right->data_type == IR_DATA_EXPRESSION) {
655 cast_insn.op = FILTER_OP_CAST_TO_S64;
656 } else {
657 cast_insn.op = FILTER_OP_CAST_DOUBLE_TO_S64;
658 }
659 ret = bytecode_push(&ctx->bytecode, &cast_insn,
660 1, sizeof(cast_insn));
661 if (ret)
662 return ret;
663 }
664 /* We now know where the logical op can skip. */
665 target_loc = (uint16_t) bytecode_get_len(&ctx->bytecode->b);
666 ret = bytecode_patch(&ctx->bytecode,
667 &target_loc, /* Offset to jump to */
668 skip_offset_loc, /* Where to patch */
669 sizeof(uint16_t));
670 return ret;
671 }
672
673 /*
674 * Postorder traversal of the tree. We need the children result before
675 * we can evaluate the parent.
676 */
677 static
678 int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
679 struct ir_op *node)
680 {
681 switch (node->op) {
682 case IR_OP_UNKNOWN:
683 default:
684 fprintf(stderr, "[error] Unknown node type in %s\n",
685 __func__);
686 return -EINVAL;
687
688 case IR_OP_ROOT:
689 return visit_node_root(ctx, node);
690 case IR_OP_LOAD:
691 return visit_node_load(ctx, node);
692 case IR_OP_UNARY:
693 return visit_node_unary(ctx, node);
694 case IR_OP_BINARY:
695 return visit_node_binary(ctx, node);
696 case IR_OP_LOGICAL:
697 return visit_node_logical(ctx, node);
698 }
699 }
700
701 LTTNG_HIDDEN
702 void filter_bytecode_free(struct filter_parser_ctx *ctx)
703 {
704 if (!ctx) {
705 return;
706 }
707
708 if (ctx->bytecode) {
709 free(ctx->bytecode);
710 ctx->bytecode = NULL;
711 }
712
713 if (ctx->bytecode_reloc) {
714 free(ctx->bytecode_reloc);
715 ctx->bytecode_reloc = NULL;
716 }
717 }
718
719 LTTNG_HIDDEN
720 int filter_visitor_bytecode_generate(struct filter_parser_ctx *ctx)
721 {
722 int ret;
723
724 ret = bytecode_init(&ctx->bytecode);
725 if (ret)
726 return ret;
727 ret = bytecode_init(&ctx->bytecode_reloc);
728 if (ret)
729 goto error;
730 ret = recursive_visit_gen_bytecode(ctx, ctx->ir_root);
731 if (ret)
732 goto error;
733
734 /* Finally, append symbol table to bytecode */
735 ctx->bytecode->b.reloc_table_offset = bytecode_get_len(&ctx->bytecode->b);
736 return bytecode_push(&ctx->bytecode, ctx->bytecode_reloc->b.data,
737 1, bytecode_get_len(&ctx->bytecode_reloc->b));
738
739 error:
740 filter_bytecode_free(ctx);
741 return ret;
742 }
This page took 0.044297 seconds and 5 git commands to generate.