Rename C++ header files to .hpp
[lttng-tools.git] / src / common / filter / filter-visitor-generate-bytecode.cpp
1 /*
2 * filter-visitor-generate-bytecode.c
3 *
4 * LTTng filter bytecode generation
5 *
6 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * SPDX-License-Identifier: LGPL-2.1-only
9 *
10 */
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <common/align.hpp>
15 #include <common/compat/errno.hpp>
16 #include <common/compat/string.hpp>
17
18 #include "common/align.hpp"
19 #include "common/bytecode/bytecode.hpp"
20 #include "common/compat/string.hpp"
21 #include "common/macros.hpp"
22 #include "common/string-utils/string-utils.hpp"
23 #include "filter-ast.hpp"
24 #include "filter-ir.hpp"
25
26 #ifndef max_t
27 #define max_t(type, a, b) ((type) ((a) > (b) ? (a) : (b)))
28 #endif
29
30 static
31 int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
32 struct ir_op *node);
33
34 static
35 int bytecode_patch(struct lttng_bytecode_alloc **fb,
36 const void *data,
37 uint16_t offset,
38 uint32_t len)
39 {
40 if (offset >= (*fb)->b.len) {
41 return -EINVAL;
42 }
43 memcpy(&(*fb)->b.data[offset], data, len);
44 return 0;
45 }
46
47 static
48 int visit_node_root(struct filter_parser_ctx *ctx, struct ir_op *node)
49 {
50 int ret;
51 struct return_op insn;
52
53 /* Visit child */
54 ret = recursive_visit_gen_bytecode(ctx, node->u.root.child);
55 if (ret)
56 return ret;
57
58 /* Generate end of bytecode instruction */
59 insn.op = BYTECODE_OP_RETURN;
60 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
61 }
62
63 /*
64 * 1: match
65 * 0: no match
66 * < 0: error
67 */
68 static
69 int load_expression_legacy_match(const struct ir_load_expression *exp,
70 enum bytecode_op *op_type,
71 char **symbol)
72 {
73 const struct ir_load_expression_op *op;
74 bool need_dot = false;
75
76 op = exp->child;
77 switch (op->type) {
78 case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
79 *op_type = BYTECODE_OP_GET_CONTEXT_REF;
80 if (strutils_append_str(symbol, "$ctx.")) {
81 return -ENOMEM;
82 }
83 need_dot = false;
84 break;
85 case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
86 *op_type = BYTECODE_OP_GET_CONTEXT_REF;
87 if (strutils_append_str(symbol, "$app.")) {
88 return -ENOMEM;
89 }
90 need_dot = false;
91 break;
92 case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
93 *op_type = BYTECODE_OP_LOAD_FIELD_REF;
94 need_dot = false;
95 break;
96
97 case IR_LOAD_EXPRESSION_GET_SYMBOL:
98 case IR_LOAD_EXPRESSION_GET_INDEX:
99 case IR_LOAD_EXPRESSION_LOAD_FIELD:
100 default:
101 return 0; /* no match */
102 }
103
104 for (;;) {
105 op = op->next;
106 if (!op) {
107 return 0; /* no match */
108 }
109 switch (op->type) {
110 case IR_LOAD_EXPRESSION_LOAD_FIELD:
111 goto end;
112 case IR_LOAD_EXPRESSION_GET_SYMBOL:
113 if (need_dot && strutils_append_str(symbol, ".")) {
114 return -ENOMEM;
115 }
116 if (strutils_append_str(symbol, op->u.symbol)) {
117 return -ENOMEM;
118 }
119 break;
120 default:
121 return 0; /* no match */
122 }
123 need_dot = true;
124 }
125 end:
126 return 1; /* Legacy match */
127 }
128
129 /*
130 * 1: legacy match
131 * 0: no legacy match
132 * < 0: error
133 */
134 static
135 int visit_node_load_expression_legacy(struct filter_parser_ctx *ctx,
136 const struct ir_load_expression *exp,
137 const struct ir_load_expression_op *op)
138 {
139 struct load_op *insn = NULL;
140 uint32_t insn_len = sizeof(struct load_op)
141 + sizeof(struct field_ref);
142 struct field_ref ref_offset;
143 uint32_t reloc_offset_u32;
144 uint16_t reloc_offset;
145 enum bytecode_op op_type;
146 char *symbol = NULL;
147 int ret;
148
149 ret = load_expression_legacy_match(exp, &op_type, &symbol);
150 if (ret <= 0) {
151 goto end;
152 }
153 insn = (load_op *) calloc(insn_len, 1);
154 if (!insn) {
155 ret = -ENOMEM;
156 goto end;
157 }
158 insn->op = op_type;
159 ref_offset.offset = (uint16_t) -1U;
160 memcpy(insn->data, &ref_offset, sizeof(ref_offset));
161 /* reloc_offset points to struct load_op */
162 reloc_offset_u32 = bytecode_get_len(&ctx->bytecode->b);
163 if (reloc_offset_u32 > LTTNG_FILTER_MAX_LEN - 1) {
164 ret = -EINVAL;
165 goto end;
166 }
167 reloc_offset = (uint16_t) reloc_offset_u32;
168 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
169 if (ret) {
170 goto end;
171 }
172 /* append reloc */
173 ret = bytecode_push(&ctx->bytecode_reloc, &reloc_offset,
174 1, sizeof(reloc_offset));
175 if (ret) {
176 goto end;
177 }
178 ret = bytecode_push(&ctx->bytecode_reloc, symbol,
179 1, strlen(symbol) + 1);
180 if (ret) {
181 goto end;
182 }
183 ret = 1; /* legacy */
184 end:
185 free(insn);
186 free(symbol);
187 return ret;
188 }
189
190 static
191 int visit_node_load_expression(struct filter_parser_ctx *ctx,
192 const struct ir_op *node)
193 {
194 struct ir_load_expression *exp;
195 struct ir_load_expression_op *op;
196 int ret;
197
198 exp = node->u.load.u.expression;
199 if (!exp) {
200 return -EINVAL;
201 }
202 op = exp->child;
203 if (!op) {
204 return -EINVAL;
205 }
206
207 /*
208 * TODO: if we remove legacy load for application contexts, we
209 * need to update session bytecode parser as well.
210 */
211 ret = visit_node_load_expression_legacy(ctx, exp, op);
212 if (ret < 0) {
213 return ret;
214 }
215 if (ret > 0) {
216 return 0; /* legacy */
217 }
218
219 for (; op != NULL; op = op->next) {
220 switch (op->type) {
221 case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
222 {
223 ret = bytecode_push_get_context_root(&ctx->bytecode);
224
225 if (ret) {
226 return ret;
227 }
228
229 break;
230 }
231 case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
232 {
233 ret = bytecode_push_get_app_context_root(
234 &ctx->bytecode);
235
236 if (ret) {
237 return ret;
238 }
239
240 break;
241 }
242 case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
243 {
244 ret = bytecode_push_get_payload_root(&ctx->bytecode);
245
246 if (ret) {
247 return ret;
248 }
249
250 break;
251 }
252 case IR_LOAD_EXPRESSION_GET_SYMBOL:
253 {
254 ret = bytecode_push_get_symbol(&ctx->bytecode,
255 &ctx->bytecode_reloc, op->u.symbol);
256
257 if (ret) {
258 return ret;
259 }
260
261 break;
262 }
263 case IR_LOAD_EXPRESSION_GET_INDEX:
264 {
265 ret = bytecode_push_get_index_u64(
266 &ctx->bytecode, op->u.index);
267
268 if (ret) {
269 return ret;
270 }
271
272 break;
273 }
274 case IR_LOAD_EXPRESSION_LOAD_FIELD:
275 {
276 struct load_op *insn;
277 uint32_t insn_len = sizeof(struct load_op);
278
279 insn = (load_op *) calloc(insn_len, 1);
280 if (!insn)
281 return -ENOMEM;
282 insn->op = BYTECODE_OP_LOAD_FIELD;
283 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
284 free(insn);
285 if (ret) {
286 return ret;
287 }
288 break;
289 }
290 }
291 }
292 return 0;
293 }
294
295 static
296 int visit_node_load(struct filter_parser_ctx *ctx, struct ir_op *node)
297 {
298 int ret;
299
300 switch (node->data_type) {
301 case IR_DATA_UNKNOWN:
302 default:
303 fprintf(stderr, "[error] Unknown data type in %s\n",
304 __func__);
305 return -EINVAL;
306
307 case IR_DATA_STRING:
308 {
309 struct load_op *insn;
310 uint32_t insn_len = sizeof(struct load_op)
311 + strlen(node->u.load.u.string.value) + 1;
312
313 insn = (load_op *) calloc(insn_len, 1);
314 if (!insn)
315 return -ENOMEM;
316
317 switch (node->u.load.u.string.type) {
318 case IR_LOAD_STRING_TYPE_GLOB_STAR:
319 /*
320 * We explicitly tell the interpreter here that
321 * this load is a full star globbing pattern so
322 * that the appropriate matching function can be
323 * called. Also, see comment below.
324 */
325 insn->op = BYTECODE_OP_LOAD_STAR_GLOB_STRING;
326 break;
327 default:
328 /*
329 * This is the "legacy" string, which includes
330 * star globbing patterns with a star only at
331 * the end. Both "plain" and "star at the end"
332 * literal strings are handled at the same place
333 * by the tracer's filter bytecode interpreter,
334 * whereas full star globbing patterns (stars
335 * can be anywhere in the string) is a special
336 * case.
337 */
338 insn->op = BYTECODE_OP_LOAD_STRING;
339 break;
340 }
341
342 strcpy(insn->data, node->u.load.u.string.value);
343 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
344 free(insn);
345 return ret;
346 }
347 case IR_DATA_NUMERIC:
348 {
349 struct load_op *insn;
350 uint32_t insn_len = sizeof(struct load_op)
351 + sizeof(struct literal_numeric);
352
353 insn = (load_op *) calloc(insn_len, 1);
354 if (!insn)
355 return -ENOMEM;
356 insn->op = BYTECODE_OP_LOAD_S64;
357 memcpy(insn->data, &node->u.load.u.num, sizeof(int64_t));
358 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
359 free(insn);
360 return ret;
361 }
362 case IR_DATA_FLOAT:
363 {
364 struct load_op *insn;
365 uint32_t insn_len = sizeof(struct load_op)
366 + sizeof(struct literal_double);
367
368 insn = (load_op *) calloc(insn_len, 1);
369 if (!insn)
370 return -ENOMEM;
371 insn->op = BYTECODE_OP_LOAD_DOUBLE;
372 memcpy(insn->data, &node->u.load.u.flt, sizeof(double));
373 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
374 free(insn);
375 return ret;
376 }
377 case IR_DATA_EXPRESSION:
378 return visit_node_load_expression(ctx, node);
379 }
380 }
381
382 static
383 int visit_node_unary(struct filter_parser_ctx *ctx, struct ir_op *node)
384 {
385 int ret;
386 struct unary_op insn;
387
388 /* Visit child */
389 ret = recursive_visit_gen_bytecode(ctx, node->u.unary.child);
390 if (ret)
391 return ret;
392
393 /* Generate end of bytecode instruction */
394 switch (node->u.unary.type) {
395 case AST_UNARY_UNKNOWN:
396 default:
397 fprintf(stderr, "[error] Unknown unary node type in %s\n",
398 __func__);
399 return -EINVAL;
400 case AST_UNARY_PLUS:
401 /* Nothing to do. */
402 return 0;
403 case AST_UNARY_MINUS:
404 insn.op = BYTECODE_OP_UNARY_MINUS;
405 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
406 case AST_UNARY_NOT:
407 insn.op = BYTECODE_OP_UNARY_NOT;
408 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
409 case AST_UNARY_BIT_NOT:
410 insn.op = BYTECODE_OP_UNARY_BIT_NOT;
411 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
412 }
413 }
414
415 /*
416 * Binary comparator nesting is disallowed. This allows fitting into
417 * only 2 registers.
418 */
419 static
420 int visit_node_binary(struct filter_parser_ctx *ctx, struct ir_op *node)
421 {
422 int ret;
423 struct binary_op insn;
424
425 /* Visit child */
426 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
427 if (ret)
428 return ret;
429 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
430 if (ret)
431 return ret;
432
433 switch (node->u.binary.type) {
434 case AST_OP_UNKNOWN:
435 default:
436 fprintf(stderr, "[error] Unknown unary node type in %s\n",
437 __func__);
438 return -EINVAL;
439
440 case AST_OP_AND:
441 case AST_OP_OR:
442 fprintf(stderr, "[error] Unexpected logical node type in %s\n",
443 __func__);
444 return -EINVAL;
445
446 case AST_OP_MUL:
447 insn.op = BYTECODE_OP_MUL;
448 break;
449 case AST_OP_DIV:
450 insn.op = BYTECODE_OP_DIV;
451 break;
452 case AST_OP_MOD:
453 insn.op = BYTECODE_OP_MOD;
454 break;
455 case AST_OP_PLUS:
456 insn.op = BYTECODE_OP_PLUS;
457 break;
458 case AST_OP_MINUS:
459 insn.op = BYTECODE_OP_MINUS;
460 break;
461 case AST_OP_BIT_RSHIFT:
462 insn.op = BYTECODE_OP_BIT_RSHIFT;
463 break;
464 case AST_OP_BIT_LSHIFT:
465 insn.op = BYTECODE_OP_BIT_LSHIFT;
466 break;
467 case AST_OP_BIT_AND:
468 insn.op = BYTECODE_OP_BIT_AND;
469 break;
470 case AST_OP_BIT_OR:
471 insn.op = BYTECODE_OP_BIT_OR;
472 break;
473 case AST_OP_BIT_XOR:
474 insn.op = BYTECODE_OP_BIT_XOR;
475 break;
476
477 case AST_OP_EQ:
478 insn.op = BYTECODE_OP_EQ;
479 break;
480 case AST_OP_NE:
481 insn.op = BYTECODE_OP_NE;
482 break;
483 case AST_OP_GT:
484 insn.op = BYTECODE_OP_GT;
485 break;
486 case AST_OP_LT:
487 insn.op = BYTECODE_OP_LT;
488 break;
489 case AST_OP_GE:
490 insn.op = BYTECODE_OP_GE;
491 break;
492 case AST_OP_LE:
493 insn.op = BYTECODE_OP_LE;
494 break;
495 }
496 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
497 }
498
499 /*
500 * A logical op always return a s64 (1 or 0).
501 */
502 static
503 int visit_node_logical(struct filter_parser_ctx *ctx, struct ir_op *node)
504 {
505 int ret;
506 struct logical_op insn;
507 uint16_t skip_offset_loc;
508 uint16_t target_loc;
509
510 /* Visit left child */
511 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
512 if (ret)
513 return ret;
514 /* Cast to s64 if float or field ref */
515 if ((node->u.binary.left->data_type == IR_DATA_FIELD_REF
516 || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
517 || node->u.binary.left->data_type == IR_DATA_EXPRESSION)
518 || node->u.binary.left->data_type == IR_DATA_FLOAT) {
519 struct cast_op cast_insn;
520
521 if (node->u.binary.left->data_type == IR_DATA_FIELD_REF
522 || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
523 || node->u.binary.left->data_type == IR_DATA_EXPRESSION) {
524 cast_insn.op = BYTECODE_OP_CAST_TO_S64;
525 } else {
526 cast_insn.op = BYTECODE_OP_CAST_DOUBLE_TO_S64;
527 }
528 ret = bytecode_push(&ctx->bytecode, &cast_insn,
529 1, sizeof(cast_insn));
530 if (ret)
531 return ret;
532 }
533 switch (node->u.logical.type) {
534 default:
535 fprintf(stderr, "[error] Unknown node type in %s\n",
536 __func__);
537 return -EINVAL;
538
539 case AST_OP_AND:
540 insn.op = BYTECODE_OP_AND;
541 break;
542 case AST_OP_OR:
543 insn.op = BYTECODE_OP_OR;
544 break;
545 }
546 insn.skip_offset = (uint16_t) -1UL; /* Temporary */
547 ret = bytecode_push_logical(&ctx->bytecode, &insn, 1, sizeof(insn),
548 &skip_offset_loc);
549 if (ret)
550 return ret;
551 /* Visit right child */
552 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
553 if (ret)
554 return ret;
555 /* Cast to s64 if float or field ref */
556 if ((node->u.binary.right->data_type == IR_DATA_FIELD_REF
557 || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
558 || node->u.binary.right->data_type == IR_DATA_EXPRESSION)
559 || node->u.binary.right->data_type == IR_DATA_FLOAT) {
560 struct cast_op cast_insn;
561
562 if (node->u.binary.right->data_type == IR_DATA_FIELD_REF
563 || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
564 || node->u.binary.right->data_type == IR_DATA_EXPRESSION) {
565 cast_insn.op = BYTECODE_OP_CAST_TO_S64;
566 } else {
567 cast_insn.op = BYTECODE_OP_CAST_DOUBLE_TO_S64;
568 }
569 ret = bytecode_push(&ctx->bytecode, &cast_insn,
570 1, sizeof(cast_insn));
571 if (ret)
572 return ret;
573 }
574 /* We now know where the logical op can skip. */
575 target_loc = (uint16_t) bytecode_get_len(&ctx->bytecode->b);
576 ret = bytecode_patch(&ctx->bytecode,
577 &target_loc, /* Offset to jump to */
578 skip_offset_loc, /* Where to patch */
579 sizeof(uint16_t));
580 return ret;
581 }
582
583 /*
584 * Postorder traversal of the tree. We need the children result before
585 * we can evaluate the parent.
586 */
587 static
588 int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
589 struct ir_op *node)
590 {
591 switch (node->op) {
592 case IR_OP_UNKNOWN:
593 default:
594 fprintf(stderr, "[error] Unknown node type in %s\n",
595 __func__);
596 return -EINVAL;
597
598 case IR_OP_ROOT:
599 return visit_node_root(ctx, node);
600 case IR_OP_LOAD:
601 return visit_node_load(ctx, node);
602 case IR_OP_UNARY:
603 return visit_node_unary(ctx, node);
604 case IR_OP_BINARY:
605 return visit_node_binary(ctx, node);
606 case IR_OP_LOGICAL:
607 return visit_node_logical(ctx, node);
608 }
609 }
610
611 void filter_bytecode_free(struct filter_parser_ctx *ctx)
612 {
613 if (!ctx) {
614 return;
615 }
616
617 if (ctx->bytecode) {
618 free(ctx->bytecode);
619 ctx->bytecode = NULL;
620 }
621
622 if (ctx->bytecode_reloc) {
623 free(ctx->bytecode_reloc);
624 ctx->bytecode_reloc = NULL;
625 }
626 }
627
628 int filter_visitor_bytecode_generate(struct filter_parser_ctx *ctx)
629 {
630 int ret;
631
632 ret = bytecode_init(&ctx->bytecode);
633 if (ret)
634 return ret;
635 ret = bytecode_init(&ctx->bytecode_reloc);
636 if (ret)
637 goto error;
638 ret = recursive_visit_gen_bytecode(ctx, ctx->ir_root);
639 if (ret)
640 goto error;
641
642 /* Finally, append symbol table to bytecode */
643 ctx->bytecode->b.reloc_table_offset = bytecode_get_len(&ctx->bytecode->b);
644 return bytecode_push(&ctx->bytecode, ctx->bytecode_reloc->b.data,
645 1, bytecode_get_len(&ctx->bytecode_reloc->b));
646
647 error:
648 filter_bytecode_free(ctx);
649 return ret;
650 }
This page took 0.042179 seconds and 4 git commands to generate.