common: move append_str to string-utils
[lttng-tools.git] / src / common / filter / filter-visitor-generate-bytecode.cpp
CommitLineData
953192ba
MD
1/*
2 * filter-visitor-generate-bytecode.c
3 *
4 * LTTng filter bytecode generation
5 *
ab5be9fa 6 * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
953192ba 7 *
ab5be9fa 8 * SPDX-License-Identifier: LGPL-2.1-only
953192ba 9 *
953192ba
MD
10 */
11
12#include <stdlib.h>
13#include <string.h>
46820c8b 14#include <common/align.h>
edf4b93e 15#include <common/compat/errno.h>
afc5df03 16#include <common/compat/string.h>
46820c8b 17
0ae3cfc6
SM
18#include "common/align.h"
19#include "common/bytecode/bytecode.h"
20#include "common/compat/string.h"
21#include "common/macros.h"
4ff75060 22#include "common/string-utils/string-utils.h"
953192ba 23#include "filter-ast.h"
0ae3cfc6 24#include "filter-ir.h"
a187da1a 25
953192ba
MD
26#ifndef max_t
27#define max_t(type, a, b) ((type) ((a) > (b) ? (a) : (b)))
28#endif
29
953192ba
MD
30static
31int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
32 struct ir_op *node);
33
953192ba 34static
2b00d462 35int bytecode_patch(struct lttng_bytecode_alloc **fb,
953192ba
MD
36 const void *data,
37 uint16_t offset,
38 uint32_t len)
39{
40 if (offset >= (*fb)->b.len) {
41 return -EINVAL;
42 }
43 memcpy(&(*fb)->b.data[offset], data, len);
44 return 0;
45}
46
47static
48int visit_node_root(struct filter_parser_ctx *ctx, struct ir_op *node)
49{
50 int ret;
51 struct return_op insn;
52
53 /* Visit child */
54 ret = recursive_visit_gen_bytecode(ctx, node->u.root.child);
55 if (ret)
56 return ret;
57
58 /* Generate end of bytecode instruction */
2b00d462 59 insn.op = BYTECODE_OP_RETURN;
953192ba
MD
60 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
61}
62
016dbbb4
MD
63/*
64 * 1: match
65 * 0: no match
66 * < 0: error
67 */
68static
69int load_expression_legacy_match(const struct ir_load_expression *exp,
2b00d462 70 enum bytecode_op *op_type,
016dbbb4
MD
71 char **symbol)
72{
73 const struct ir_load_expression_op *op;
74 bool need_dot = false;
75
76 op = exp->child;
77 switch (op->type) {
78 case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
2b00d462 79 *op_type = BYTECODE_OP_GET_CONTEXT_REF;
4ff75060 80 if (strutils_append_str(symbol, "$ctx.")) {
016dbbb4
MD
81 return -ENOMEM;
82 }
83 need_dot = false;
84 break;
85 case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
2b00d462 86 *op_type = BYTECODE_OP_GET_CONTEXT_REF;
4ff75060 87 if (strutils_append_str(symbol, "$app.")) {
016dbbb4
MD
88 return -ENOMEM;
89 }
90 need_dot = false;
91 break;
92 case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
2b00d462 93 *op_type = BYTECODE_OP_LOAD_FIELD_REF;
016dbbb4
MD
94 need_dot = false;
95 break;
96
97 case IR_LOAD_EXPRESSION_GET_SYMBOL:
98 case IR_LOAD_EXPRESSION_GET_INDEX:
99 case IR_LOAD_EXPRESSION_LOAD_FIELD:
100 default:
101 return 0; /* no match */
102 }
103
104 for (;;) {
105 op = op->next;
106 if (!op) {
107 return 0; /* no match */
108 }
109 switch (op->type) {
110 case IR_LOAD_EXPRESSION_LOAD_FIELD:
111 goto end;
112 case IR_LOAD_EXPRESSION_GET_SYMBOL:
4ff75060 113 if (need_dot && strutils_append_str(symbol, ".")) {
016dbbb4
MD
114 return -ENOMEM;
115 }
4ff75060 116 if (strutils_append_str(symbol, op->u.symbol)) {
016dbbb4
MD
117 return -ENOMEM;
118 }
119 break;
120 default:
121 return 0; /* no match */
122 }
123 need_dot = true;
124 }
125end:
126 return 1; /* Legacy match */
127}
128
129/*
130 * 1: legacy match
131 * 0: no legacy match
132 * < 0: error
133 */
134static
135int visit_node_load_expression_legacy(struct filter_parser_ctx *ctx,
136 const struct ir_load_expression *exp,
137 const struct ir_load_expression_op *op)
138{
139 struct load_op *insn = NULL;
140 uint32_t insn_len = sizeof(struct load_op)
141 + sizeof(struct field_ref);
142 struct field_ref ref_offset;
143 uint32_t reloc_offset_u32;
144 uint16_t reloc_offset;
2b00d462 145 enum bytecode_op op_type;
016dbbb4
MD
146 char *symbol = NULL;
147 int ret;
148
149 ret = load_expression_legacy_match(exp, &op_type, &symbol);
150 if (ret <= 0) {
151 goto end;
152 }
348ddc5c 153 insn = (load_op *) calloc(insn_len, 1);
016dbbb4
MD
154 if (!insn) {
155 ret = -ENOMEM;
156 goto end;
157 }
158 insn->op = op_type;
159 ref_offset.offset = (uint16_t) -1U;
160 memcpy(insn->data, &ref_offset, sizeof(ref_offset));
161 /* reloc_offset points to struct load_op */
162 reloc_offset_u32 = bytecode_get_len(&ctx->bytecode->b);
163 if (reloc_offset_u32 > LTTNG_FILTER_MAX_LEN - 1) {
164 ret = -EINVAL;
165 goto end;
166 }
167 reloc_offset = (uint16_t) reloc_offset_u32;
168 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
169 if (ret) {
170 goto end;
171 }
172 /* append reloc */
173 ret = bytecode_push(&ctx->bytecode_reloc, &reloc_offset,
174 1, sizeof(reloc_offset));
175 if (ret) {
176 goto end;
177 }
178 ret = bytecode_push(&ctx->bytecode_reloc, symbol,
179 1, strlen(symbol) + 1);
73e38068
JG
180 if (ret) {
181 goto end;
182 }
016dbbb4
MD
183 ret = 1; /* legacy */
184end:
185 free(insn);
186 free(symbol);
187 return ret;
188}
189
bff988fa
MD
190static
191int visit_node_load_expression(struct filter_parser_ctx *ctx,
192 const struct ir_op *node)
193{
194 struct ir_load_expression *exp;
195 struct ir_load_expression_op *op;
016dbbb4 196 int ret;
bff988fa
MD
197
198 exp = node->u.load.u.expression;
199 if (!exp) {
200 return -EINVAL;
201 }
202 op = exp->child;
203 if (!op) {
204 return -EINVAL;
205 }
016dbbb4 206
b4bc01f7
MD
207 /*
208 * TODO: if we remove legacy load for application contexts, we
209 * need to update session bytecode parser as well.
210 */
016dbbb4
MD
211 ret = visit_node_load_expression_legacy(ctx, exp, op);
212 if (ret < 0) {
213 return ret;
214 }
215 if (ret > 0) {
216 return 0; /* legacy */
217 }
218
bff988fa
MD
219 for (; op != NULL; op = op->next) {
220 switch (op->type) {
221 case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
222 {
9de37b2b 223 ret = bytecode_push_get_context_root(&ctx->bytecode);
bff988fa 224
bff988fa
MD
225 if (ret) {
226 return ret;
227 }
6afbab01 228
bff988fa
MD
229 break;
230 }
231 case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
232 {
9de37b2b
SM
233 ret = bytecode_push_get_app_context_root(
234 &ctx->bytecode);
bff988fa 235
bff988fa
MD
236 if (ret) {
237 return ret;
238 }
6afbab01 239
bff988fa
MD
240 break;
241 }
242 case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
243 {
9de37b2b 244 ret = bytecode_push_get_payload_root(&ctx->bytecode);
bff988fa 245
bff988fa
MD
246 if (ret) {
247 return ret;
248 }
6afbab01 249
bff988fa
MD
250 break;
251 }
252 case IR_LOAD_EXPRESSION_GET_SYMBOL:
253 {
9de37b2b
SM
254 ret = bytecode_push_get_symbol(&ctx->bytecode,
255 &ctx->bytecode_reloc, op->u.symbol);
bff988fa 256
bff988fa
MD
257 if (ret) {
258 return ret;
259 }
6afbab01 260
bff988fa
MD
261 break;
262 }
263 case IR_LOAD_EXPRESSION_GET_INDEX:
264 {
9de37b2b
SM
265 ret = bytecode_push_get_index_u64(
266 &ctx->bytecode, op->u.index);
bff988fa 267
bff988fa
MD
268 if (ret) {
269 return ret;
270 }
6afbab01 271
bff988fa
MD
272 break;
273 }
274 case IR_LOAD_EXPRESSION_LOAD_FIELD:
275 {
276 struct load_op *insn;
277 uint32_t insn_len = sizeof(struct load_op);
bff988fa 278
348ddc5c 279 insn = (load_op *) calloc(insn_len, 1);
bff988fa
MD
280 if (!insn)
281 return -ENOMEM;
2b00d462 282 insn->op = BYTECODE_OP_LOAD_FIELD;
bff988fa
MD
283 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
284 free(insn);
285 if (ret) {
286 return ret;
287 }
288 break;
289 }
290 }
291 }
292 return 0;
293}
294
953192ba
MD
295static
296int visit_node_load(struct filter_parser_ctx *ctx, struct ir_op *node)
297{
298 int ret;
299
300 switch (node->data_type) {
301 case IR_DATA_UNKNOWN:
302 default:
303 fprintf(stderr, "[error] Unknown data type in %s\n",
304 __func__);
305 return -EINVAL;
306
307 case IR_DATA_STRING:
308 {
309 struct load_op *insn;
310 uint32_t insn_len = sizeof(struct load_op)
9f449915 311 + strlen(node->u.load.u.string.value) + 1;
953192ba 312
348ddc5c 313 insn = (load_op *) calloc(insn_len, 1);
953192ba
MD
314 if (!insn)
315 return -ENOMEM;
9f449915
PP
316
317 switch (node->u.load.u.string.type) {
318 case IR_LOAD_STRING_TYPE_GLOB_STAR:
319 /*
320 * We explicitly tell the interpreter here that
321 * this load is a full star globbing pattern so
322 * that the appropriate matching function can be
323 * called. Also, see comment below.
324 */
2b00d462 325 insn->op = BYTECODE_OP_LOAD_STAR_GLOB_STRING;
9f449915
PP
326 break;
327 default:
328 /*
329 * This is the "legacy" string, which includes
330 * star globbing patterns with a star only at
331 * the end. Both "plain" and "star at the end"
332 * literal strings are handled at the same place
333 * by the tracer's filter bytecode interpreter,
334 * whereas full star globbing patterns (stars
335 * can be anywhere in the string) is a special
336 * case.
337 */
2b00d462 338 insn->op = BYTECODE_OP_LOAD_STRING;
9f449915
PP
339 break;
340 }
341
342 strcpy(insn->data, node->u.load.u.string.value);
953192ba
MD
343 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
344 free(insn);
345 return ret;
346 }
347 case IR_DATA_NUMERIC:
348 {
349 struct load_op *insn;
350 uint32_t insn_len = sizeof(struct load_op)
351 + sizeof(struct literal_numeric);
352
348ddc5c 353 insn = (load_op *) calloc(insn_len, 1);
953192ba
MD
354 if (!insn)
355 return -ENOMEM;
2b00d462 356 insn->op = BYTECODE_OP_LOAD_S64;
58d494e4 357 memcpy(insn->data, &node->u.load.u.num, sizeof(int64_t));
953192ba
MD
358 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
359 free(insn);
360 return ret;
361 }
e90d8561
MD
362 case IR_DATA_FLOAT:
363 {
364 struct load_op *insn;
365 uint32_t insn_len = sizeof(struct load_op)
366 + sizeof(struct literal_double);
367
348ddc5c 368 insn = (load_op *) calloc(insn_len, 1);
e90d8561
MD
369 if (!insn)
370 return -ENOMEM;
2b00d462 371 insn->op = BYTECODE_OP_LOAD_DOUBLE;
58d494e4 372 memcpy(insn->data, &node->u.load.u.flt, sizeof(double));
e90d8561
MD
373 ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
374 free(insn);
375 return ret;
376 }
bff988fa
MD
377 case IR_DATA_EXPRESSION:
378 return visit_node_load_expression(ctx, node);
953192ba
MD
379 }
380}
381
382static
383int visit_node_unary(struct filter_parser_ctx *ctx, struct ir_op *node)
384{
385 int ret;
386 struct unary_op insn;
387
388 /* Visit child */
389 ret = recursive_visit_gen_bytecode(ctx, node->u.unary.child);
390 if (ret)
391 return ret;
392
393 /* Generate end of bytecode instruction */
394 switch (node->u.unary.type) {
395 case AST_UNARY_UNKNOWN:
396 default:
397 fprintf(stderr, "[error] Unknown unary node type in %s\n",
398 __func__);
399 return -EINVAL;
400 case AST_UNARY_PLUS:
401 /* Nothing to do. */
402 return 0;
403 case AST_UNARY_MINUS:
2b00d462 404 insn.op = BYTECODE_OP_UNARY_MINUS;
953192ba
MD
405 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
406 case AST_UNARY_NOT:
2b00d462 407 insn.op = BYTECODE_OP_UNARY_NOT;
953192ba 408 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
116d3c01 409 case AST_UNARY_BIT_NOT:
2b00d462 410 insn.op = BYTECODE_OP_UNARY_BIT_NOT;
116d3c01 411 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
953192ba
MD
412 }
413}
414
415/*
416 * Binary comparator nesting is disallowed. This allows fitting into
417 * only 2 registers.
418 */
419static
420int visit_node_binary(struct filter_parser_ctx *ctx, struct ir_op *node)
421{
422 int ret;
423 struct binary_op insn;
424
425 /* Visit child */
426 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
427 if (ret)
428 return ret;
429 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
430 if (ret)
431 return ret;
432
433 switch (node->u.binary.type) {
434 case AST_OP_UNKNOWN:
435 default:
436 fprintf(stderr, "[error] Unknown unary node type in %s\n",
437 __func__);
438 return -EINVAL;
439
440 case AST_OP_AND:
441 case AST_OP_OR:
442 fprintf(stderr, "[error] Unexpected logical node type in %s\n",
443 __func__);
444 return -EINVAL;
445
446 case AST_OP_MUL:
2b00d462 447 insn.op = BYTECODE_OP_MUL;
953192ba
MD
448 break;
449 case AST_OP_DIV:
2b00d462 450 insn.op = BYTECODE_OP_DIV;
953192ba
MD
451 break;
452 case AST_OP_MOD:
2b00d462 453 insn.op = BYTECODE_OP_MOD;
953192ba
MD
454 break;
455 case AST_OP_PLUS:
2b00d462 456 insn.op = BYTECODE_OP_PLUS;
953192ba
MD
457 break;
458 case AST_OP_MINUS:
2b00d462 459 insn.op = BYTECODE_OP_MINUS;
953192ba 460 break;
116d3c01 461 case AST_OP_BIT_RSHIFT:
2b00d462 462 insn.op = BYTECODE_OP_BIT_RSHIFT;
953192ba 463 break;
116d3c01 464 case AST_OP_BIT_LSHIFT:
2b00d462 465 insn.op = BYTECODE_OP_BIT_LSHIFT;
953192ba 466 break;
bff988fa 467 case AST_OP_BIT_AND:
2b00d462 468 insn.op = BYTECODE_OP_BIT_AND;
953192ba 469 break;
bff988fa 470 case AST_OP_BIT_OR:
2b00d462 471 insn.op = BYTECODE_OP_BIT_OR;
953192ba 472 break;
bff988fa 473 case AST_OP_BIT_XOR:
2b00d462 474 insn.op = BYTECODE_OP_BIT_XOR;
953192ba
MD
475 break;
476
477 case AST_OP_EQ:
2b00d462 478 insn.op = BYTECODE_OP_EQ;
953192ba
MD
479 break;
480 case AST_OP_NE:
2b00d462 481 insn.op = BYTECODE_OP_NE;
953192ba
MD
482 break;
483 case AST_OP_GT:
2b00d462 484 insn.op = BYTECODE_OP_GT;
953192ba
MD
485 break;
486 case AST_OP_LT:
2b00d462 487 insn.op = BYTECODE_OP_LT;
953192ba
MD
488 break;
489 case AST_OP_GE:
2b00d462 490 insn.op = BYTECODE_OP_GE;
953192ba
MD
491 break;
492 case AST_OP_LE:
2b00d462 493 insn.op = BYTECODE_OP_LE;
953192ba
MD
494 break;
495 }
496 return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
497}
498
8cf9540a
MD
499/*
500 * A logical op always return a s64 (1 or 0).
501 */
953192ba
MD
502static
503int visit_node_logical(struct filter_parser_ctx *ctx, struct ir_op *node)
504{
505 int ret;
506 struct logical_op insn;
507 uint16_t skip_offset_loc;
508 uint16_t target_loc;
509
510 /* Visit left child */
511 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
512 if (ret)
513 return ret;
8cf9540a 514 /* Cast to s64 if float or field ref */
586dc72f 515 if ((node->u.binary.left->data_type == IR_DATA_FIELD_REF
661dfdd1 516 || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
bff988fa 517 || node->u.binary.left->data_type == IR_DATA_EXPRESSION)
8cf9540a
MD
518 || node->u.binary.left->data_type == IR_DATA_FLOAT) {
519 struct cast_op cast_insn;
520
586dc72f 521 if (node->u.binary.left->data_type == IR_DATA_FIELD_REF
661dfdd1 522 || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
bff988fa 523 || node->u.binary.left->data_type == IR_DATA_EXPRESSION) {
2b00d462 524 cast_insn.op = BYTECODE_OP_CAST_TO_S64;
29fefef8 525 } else {
2b00d462 526 cast_insn.op = BYTECODE_OP_CAST_DOUBLE_TO_S64;
29fefef8 527 }
8cf9540a
MD
528 ret = bytecode_push(&ctx->bytecode, &cast_insn,
529 1, sizeof(cast_insn));
530 if (ret)
531 return ret;
532 }
953192ba
MD
533 switch (node->u.logical.type) {
534 default:
535 fprintf(stderr, "[error] Unknown node type in %s\n",
536 __func__);
537 return -EINVAL;
538
539 case AST_OP_AND:
2b00d462 540 insn.op = BYTECODE_OP_AND;
953192ba
MD
541 break;
542 case AST_OP_OR:
2b00d462 543 insn.op = BYTECODE_OP_OR;
953192ba
MD
544 break;
545 }
546 insn.skip_offset = (uint16_t) -1UL; /* Temporary */
547 ret = bytecode_push_logical(&ctx->bytecode, &insn, 1, sizeof(insn),
548 &skip_offset_loc);
549 if (ret)
550 return ret;
551 /* Visit right child */
552 ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
553 if (ret)
554 return ret;
8cf9540a 555 /* Cast to s64 if float or field ref */
586dc72f 556 if ((node->u.binary.right->data_type == IR_DATA_FIELD_REF
661dfdd1 557 || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
bff988fa 558 || node->u.binary.right->data_type == IR_DATA_EXPRESSION)
8cf9540a
MD
559 || node->u.binary.right->data_type == IR_DATA_FLOAT) {
560 struct cast_op cast_insn;
561
586dc72f 562 if (node->u.binary.right->data_type == IR_DATA_FIELD_REF
661dfdd1 563 || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
bff988fa 564 || node->u.binary.right->data_type == IR_DATA_EXPRESSION) {
2b00d462 565 cast_insn.op = BYTECODE_OP_CAST_TO_S64;
29fefef8 566 } else {
2b00d462 567 cast_insn.op = BYTECODE_OP_CAST_DOUBLE_TO_S64;
29fefef8 568 }
8cf9540a
MD
569 ret = bytecode_push(&ctx->bytecode, &cast_insn,
570 1, sizeof(cast_insn));
571 if (ret)
572 return ret;
573 }
953192ba
MD
574 /* We now know where the logical op can skip. */
575 target_loc = (uint16_t) bytecode_get_len(&ctx->bytecode->b);
576 ret = bytecode_patch(&ctx->bytecode,
577 &target_loc, /* Offset to jump to */
578 skip_offset_loc, /* Where to patch */
579 sizeof(uint16_t));
580 return ret;
581}
582
583/*
584 * Postorder traversal of the tree. We need the children result before
585 * we can evaluate the parent.
586 */
587static
588int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
589 struct ir_op *node)
590{
591 switch (node->op) {
592 case IR_OP_UNKNOWN:
593 default:
594 fprintf(stderr, "[error] Unknown node type in %s\n",
595 __func__);
596 return -EINVAL;
597
598 case IR_OP_ROOT:
599 return visit_node_root(ctx, node);
600 case IR_OP_LOAD:
601 return visit_node_load(ctx, node);
602 case IR_OP_UNARY:
603 return visit_node_unary(ctx, node);
604 case IR_OP_BINARY:
605 return visit_node_binary(ctx, node);
606 case IR_OP_LOGICAL:
607 return visit_node_logical(ctx, node);
608 }
609}
610
611void filter_bytecode_free(struct filter_parser_ctx *ctx)
612{
7ca1dc6f
DG
613 if (!ctx) {
614 return;
615 }
616
3f0c8837
DG
617 if (ctx->bytecode) {
618 free(ctx->bytecode);
619 ctx->bytecode = NULL;
620 }
621
622 if (ctx->bytecode_reloc) {
623 free(ctx->bytecode_reloc);
624 ctx->bytecode_reloc = NULL;
625 }
953192ba
MD
626}
627
628int filter_visitor_bytecode_generate(struct filter_parser_ctx *ctx)
629{
630 int ret;
631
632 ret = bytecode_init(&ctx->bytecode);
633 if (ret)
634 return ret;
635 ret = bytecode_init(&ctx->bytecode_reloc);
636 if (ret)
637 goto error;
638 ret = recursive_visit_gen_bytecode(ctx, ctx->ir_root);
639 if (ret)
640 goto error;
641
642 /* Finally, append symbol table to bytecode */
643 ctx->bytecode->b.reloc_table_offset = bytecode_get_len(&ctx->bytecode->b);
644 return bytecode_push(&ctx->bytecode, ctx->bytecode_reloc->b.data,
645 1, bytecode_get_len(&ctx->bytecode_reloc->b));
646
647error:
648 filter_bytecode_free(ctx);
649 return ret;
650}
This page took 0.080288 seconds and 4 git commands to generate.