--- /dev/null
+/*
+ * filter-visitor-generate-bytecode.c
+ *
+ * LTTng filter bytecode generation
+ *
+ * Copyright 2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * SPDX-License-Identifier: LGPL-2.1-only
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <common/align.h>
+#include <common/compat/errno.h>
+#include <common/compat/string.h>
+
+#include "common/align.h"
+#include "common/bytecode/bytecode.h"
+#include "common/compat/string.h"
+#include "common/macros.h"
+#include "filter-ast.h"
+#include "filter-ir.h"
+
+#ifndef max_t
+#define max_t(type, a, b) ((type) ((a) > (b) ? (a) : (b)))
+#endif
+
+static
+int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
+ struct ir_op *node);
+
+static
+int bytecode_patch(struct lttng_bytecode_alloc **fb,
+ const void *data,
+ uint16_t offset,
+ uint32_t len)
+{
+ if (offset >= (*fb)->b.len) {
+ return -EINVAL;
+ }
+ memcpy(&(*fb)->b.data[offset], data, len);
+ return 0;
+}
+
+static
+int visit_node_root(struct filter_parser_ctx *ctx, struct ir_op *node)
+{
+ int ret;
+ struct return_op insn;
+
+ /* Visit child */
+ ret = recursive_visit_gen_bytecode(ctx, node->u.root.child);
+ if (ret)
+ return ret;
+
+ /* Generate end of bytecode instruction */
+ insn.op = BYTECODE_OP_RETURN;
+ return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
+}
+
+static
+int append_str(char **s, const char *append)
+{
+ char *old_str = *s;
+ char *new_str;
+ size_t oldlen = (old_str == NULL) ? 0 : strlen(old_str);
+ size_t appendlen = strlen(append);
+
+ new_str = (char *) calloc(oldlen + appendlen + 1, 1);
+ if (!new_str) {
+ return -ENOMEM;
+ }
+ if (oldlen) {
+ strcpy(new_str, old_str);
+ }
+ strcat(new_str, append);
+ *s = new_str;
+ free(old_str);
+ return 0;
+}
+
+/*
+ * 1: match
+ * 0: no match
+ * < 0: error
+ */
+static
+int load_expression_legacy_match(const struct ir_load_expression *exp,
+ enum bytecode_op *op_type,
+ char **symbol)
+{
+ const struct ir_load_expression_op *op;
+ bool need_dot = false;
+
+ op = exp->child;
+ switch (op->type) {
+ case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
+ *op_type = BYTECODE_OP_GET_CONTEXT_REF;
+ if (append_str(symbol, "$ctx.")) {
+ return -ENOMEM;
+ }
+ need_dot = false;
+ break;
+ case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
+ *op_type = BYTECODE_OP_GET_CONTEXT_REF;
+ if (append_str(symbol, "$app.")) {
+ return -ENOMEM;
+ }
+ need_dot = false;
+ break;
+ case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
+ *op_type = BYTECODE_OP_LOAD_FIELD_REF;
+ need_dot = false;
+ break;
+
+ case IR_LOAD_EXPRESSION_GET_SYMBOL:
+ case IR_LOAD_EXPRESSION_GET_INDEX:
+ case IR_LOAD_EXPRESSION_LOAD_FIELD:
+ default:
+ return 0; /* no match */
+ }
+
+ for (;;) {
+ op = op->next;
+ if (!op) {
+ return 0; /* no match */
+ }
+ switch (op->type) {
+ case IR_LOAD_EXPRESSION_LOAD_FIELD:
+ goto end;
+ case IR_LOAD_EXPRESSION_GET_SYMBOL:
+ if (need_dot && append_str(symbol, ".")) {
+ return -ENOMEM;
+ }
+ if (append_str(symbol, op->u.symbol)) {
+ return -ENOMEM;
+ }
+ break;
+ default:
+ return 0; /* no match */
+ }
+ need_dot = true;
+ }
+end:
+ return 1; /* Legacy match */
+}
+
+/*
+ * 1: legacy match
+ * 0: no legacy match
+ * < 0: error
+ */
+static
+int visit_node_load_expression_legacy(struct filter_parser_ctx *ctx,
+ const struct ir_load_expression *exp,
+ const struct ir_load_expression_op *op)
+{
+ struct load_op *insn = NULL;
+ uint32_t insn_len = sizeof(struct load_op)
+ + sizeof(struct field_ref);
+ struct field_ref ref_offset;
+ uint32_t reloc_offset_u32;
+ uint16_t reloc_offset;
+ enum bytecode_op op_type;
+ char *symbol = NULL;
+ int ret;
+
+ ret = load_expression_legacy_match(exp, &op_type, &symbol);
+ if (ret <= 0) {
+ goto end;
+ }
+ insn = (load_op *) calloc(insn_len, 1);
+ if (!insn) {
+ ret = -ENOMEM;
+ goto end;
+ }
+ insn->op = op_type;
+ ref_offset.offset = (uint16_t) -1U;
+ memcpy(insn->data, &ref_offset, sizeof(ref_offset));
+ /* reloc_offset points to struct load_op */
+ reloc_offset_u32 = bytecode_get_len(&ctx->bytecode->b);
+ if (reloc_offset_u32 > LTTNG_FILTER_MAX_LEN - 1) {
+ ret = -EINVAL;
+ goto end;
+ }
+ reloc_offset = (uint16_t) reloc_offset_u32;
+ ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
+ if (ret) {
+ goto end;
+ }
+ /* append reloc */
+ ret = bytecode_push(&ctx->bytecode_reloc, &reloc_offset,
+ 1, sizeof(reloc_offset));
+ if (ret) {
+ goto end;
+ }
+ ret = bytecode_push(&ctx->bytecode_reloc, symbol,
+ 1, strlen(symbol) + 1);
+ if (ret) {
+ goto end;
+ }
+ ret = 1; /* legacy */
+end:
+ free(insn);
+ free(symbol);
+ return ret;
+}
+
+static
+int visit_node_load_expression(struct filter_parser_ctx *ctx,
+ const struct ir_op *node)
+{
+ struct ir_load_expression *exp;
+ struct ir_load_expression_op *op;
+ int ret;
+
+ exp = node->u.load.u.expression;
+ if (!exp) {
+ return -EINVAL;
+ }
+ op = exp->child;
+ if (!op) {
+ return -EINVAL;
+ }
+
+ /*
+ * TODO: if we remove legacy load for application contexts, we
+ * need to update session bytecode parser as well.
+ */
+ ret = visit_node_load_expression_legacy(ctx, exp, op);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret > 0) {
+ return 0; /* legacy */
+ }
+
+ for (; op != NULL; op = op->next) {
+ switch (op->type) {
+ case IR_LOAD_EXPRESSION_GET_CONTEXT_ROOT:
+ {
+ ret = bytecode_push_get_context_root(&ctx->bytecode);
+
+ if (ret) {
+ return ret;
+ }
+
+ break;
+ }
+ case IR_LOAD_EXPRESSION_GET_APP_CONTEXT_ROOT:
+ {
+ ret = bytecode_push_get_app_context_root(
+ &ctx->bytecode);
+
+ if (ret) {
+ return ret;
+ }
+
+ break;
+ }
+ case IR_LOAD_EXPRESSION_GET_PAYLOAD_ROOT:
+ {
+ ret = bytecode_push_get_payload_root(&ctx->bytecode);
+
+ if (ret) {
+ return ret;
+ }
+
+ break;
+ }
+ case IR_LOAD_EXPRESSION_GET_SYMBOL:
+ {
+ ret = bytecode_push_get_symbol(&ctx->bytecode,
+ &ctx->bytecode_reloc, op->u.symbol);
+
+ if (ret) {
+ return ret;
+ }
+
+ break;
+ }
+ case IR_LOAD_EXPRESSION_GET_INDEX:
+ {
+ ret = bytecode_push_get_index_u64(
+ &ctx->bytecode, op->u.index);
+
+ if (ret) {
+ return ret;
+ }
+
+ break;
+ }
+ case IR_LOAD_EXPRESSION_LOAD_FIELD:
+ {
+ struct load_op *insn;
+ uint32_t insn_len = sizeof(struct load_op);
+
+ insn = (load_op *) calloc(insn_len, 1);
+ if (!insn)
+ return -ENOMEM;
+ insn->op = BYTECODE_OP_LOAD_FIELD;
+ ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
+ free(insn);
+ if (ret) {
+ return ret;
+ }
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static
+int visit_node_load(struct filter_parser_ctx *ctx, struct ir_op *node)
+{
+ int ret;
+
+ switch (node->data_type) {
+ case IR_DATA_UNKNOWN:
+ default:
+ fprintf(stderr, "[error] Unknown data type in %s\n",
+ __func__);
+ return -EINVAL;
+
+ case IR_DATA_STRING:
+ {
+ struct load_op *insn;
+ uint32_t insn_len = sizeof(struct load_op)
+ + strlen(node->u.load.u.string.value) + 1;
+
+ insn = (load_op *) calloc(insn_len, 1);
+ if (!insn)
+ return -ENOMEM;
+
+ switch (node->u.load.u.string.type) {
+ case IR_LOAD_STRING_TYPE_GLOB_STAR:
+ /*
+ * We explicitly tell the interpreter here that
+ * this load is a full star globbing pattern so
+ * that the appropriate matching function can be
+ * called. Also, see comment below.
+ */
+ insn->op = BYTECODE_OP_LOAD_STAR_GLOB_STRING;
+ break;
+ default:
+ /*
+ * This is the "legacy" string, which includes
+ * star globbing patterns with a star only at
+ * the end. Both "plain" and "star at the end"
+ * literal strings are handled at the same place
+ * by the tracer's filter bytecode interpreter,
+ * whereas full star globbing patterns (stars
+ * can be anywhere in the string) is a special
+ * case.
+ */
+ insn->op = BYTECODE_OP_LOAD_STRING;
+ break;
+ }
+
+ strcpy(insn->data, node->u.load.u.string.value);
+ ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
+ free(insn);
+ return ret;
+ }
+ case IR_DATA_NUMERIC:
+ {
+ struct load_op *insn;
+ uint32_t insn_len = sizeof(struct load_op)
+ + sizeof(struct literal_numeric);
+
+ insn = (load_op *) calloc(insn_len, 1);
+ if (!insn)
+ return -ENOMEM;
+ insn->op = BYTECODE_OP_LOAD_S64;
+ memcpy(insn->data, &node->u.load.u.num, sizeof(int64_t));
+ ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
+ free(insn);
+ return ret;
+ }
+ case IR_DATA_FLOAT:
+ {
+ struct load_op *insn;
+ uint32_t insn_len = sizeof(struct load_op)
+ + sizeof(struct literal_double);
+
+ insn = (load_op *) calloc(insn_len, 1);
+ if (!insn)
+ return -ENOMEM;
+ insn->op = BYTECODE_OP_LOAD_DOUBLE;
+ memcpy(insn->data, &node->u.load.u.flt, sizeof(double));
+ ret = bytecode_push(&ctx->bytecode, insn, 1, insn_len);
+ free(insn);
+ return ret;
+ }
+ case IR_DATA_EXPRESSION:
+ return visit_node_load_expression(ctx, node);
+ }
+}
+
+static
+int visit_node_unary(struct filter_parser_ctx *ctx, struct ir_op *node)
+{
+ int ret;
+ struct unary_op insn;
+
+ /* Visit child */
+ ret = recursive_visit_gen_bytecode(ctx, node->u.unary.child);
+ if (ret)
+ return ret;
+
+ /* Generate end of bytecode instruction */
+ switch (node->u.unary.type) {
+ case AST_UNARY_UNKNOWN:
+ default:
+ fprintf(stderr, "[error] Unknown unary node type in %s\n",
+ __func__);
+ return -EINVAL;
+ case AST_UNARY_PLUS:
+ /* Nothing to do. */
+ return 0;
+ case AST_UNARY_MINUS:
+ insn.op = BYTECODE_OP_UNARY_MINUS;
+ return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
+ case AST_UNARY_NOT:
+ insn.op = BYTECODE_OP_UNARY_NOT;
+ return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
+ case AST_UNARY_BIT_NOT:
+ insn.op = BYTECODE_OP_UNARY_BIT_NOT;
+ return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
+ }
+}
+
+/*
+ * Binary comparator nesting is disallowed. This allows fitting into
+ * only 2 registers.
+ */
+static
+int visit_node_binary(struct filter_parser_ctx *ctx, struct ir_op *node)
+{
+ int ret;
+ struct binary_op insn;
+
+ /* Visit child */
+ ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
+ if (ret)
+ return ret;
+ ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
+ if (ret)
+ return ret;
+
+ switch (node->u.binary.type) {
+ case AST_OP_UNKNOWN:
+ default:
+ fprintf(stderr, "[error] Unknown unary node type in %s\n",
+ __func__);
+ return -EINVAL;
+
+ case AST_OP_AND:
+ case AST_OP_OR:
+ fprintf(stderr, "[error] Unexpected logical node type in %s\n",
+ __func__);
+ return -EINVAL;
+
+ case AST_OP_MUL:
+ insn.op = BYTECODE_OP_MUL;
+ break;
+ case AST_OP_DIV:
+ insn.op = BYTECODE_OP_DIV;
+ break;
+ case AST_OP_MOD:
+ insn.op = BYTECODE_OP_MOD;
+ break;
+ case AST_OP_PLUS:
+ insn.op = BYTECODE_OP_PLUS;
+ break;
+ case AST_OP_MINUS:
+ insn.op = BYTECODE_OP_MINUS;
+ break;
+ case AST_OP_BIT_RSHIFT:
+ insn.op = BYTECODE_OP_BIT_RSHIFT;
+ break;
+ case AST_OP_BIT_LSHIFT:
+ insn.op = BYTECODE_OP_BIT_LSHIFT;
+ break;
+ case AST_OP_BIT_AND:
+ insn.op = BYTECODE_OP_BIT_AND;
+ break;
+ case AST_OP_BIT_OR:
+ insn.op = BYTECODE_OP_BIT_OR;
+ break;
+ case AST_OP_BIT_XOR:
+ insn.op = BYTECODE_OP_BIT_XOR;
+ break;
+
+ case AST_OP_EQ:
+ insn.op = BYTECODE_OP_EQ;
+ break;
+ case AST_OP_NE:
+ insn.op = BYTECODE_OP_NE;
+ break;
+ case AST_OP_GT:
+ insn.op = BYTECODE_OP_GT;
+ break;
+ case AST_OP_LT:
+ insn.op = BYTECODE_OP_LT;
+ break;
+ case AST_OP_GE:
+ insn.op = BYTECODE_OP_GE;
+ break;
+ case AST_OP_LE:
+ insn.op = BYTECODE_OP_LE;
+ break;
+ }
+ return bytecode_push(&ctx->bytecode, &insn, 1, sizeof(insn));
+}
+
+/*
+ * A logical op always return a s64 (1 or 0).
+ */
+static
+int visit_node_logical(struct filter_parser_ctx *ctx, struct ir_op *node)
+{
+ int ret;
+ struct logical_op insn;
+ uint16_t skip_offset_loc;
+ uint16_t target_loc;
+
+ /* Visit left child */
+ ret = recursive_visit_gen_bytecode(ctx, node->u.binary.left);
+ if (ret)
+ return ret;
+ /* Cast to s64 if float or field ref */
+ if ((node->u.binary.left->data_type == IR_DATA_FIELD_REF
+ || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
+ || node->u.binary.left->data_type == IR_DATA_EXPRESSION)
+ || node->u.binary.left->data_type == IR_DATA_FLOAT) {
+ struct cast_op cast_insn;
+
+ if (node->u.binary.left->data_type == IR_DATA_FIELD_REF
+ || node->u.binary.left->data_type == IR_DATA_GET_CONTEXT_REF
+ || node->u.binary.left->data_type == IR_DATA_EXPRESSION) {
+ cast_insn.op = BYTECODE_OP_CAST_TO_S64;
+ } else {
+ cast_insn.op = BYTECODE_OP_CAST_DOUBLE_TO_S64;
+ }
+ ret = bytecode_push(&ctx->bytecode, &cast_insn,
+ 1, sizeof(cast_insn));
+ if (ret)
+ return ret;
+ }
+ switch (node->u.logical.type) {
+ default:
+ fprintf(stderr, "[error] Unknown node type in %s\n",
+ __func__);
+ return -EINVAL;
+
+ case AST_OP_AND:
+ insn.op = BYTECODE_OP_AND;
+ break;
+ case AST_OP_OR:
+ insn.op = BYTECODE_OP_OR;
+ break;
+ }
+ insn.skip_offset = (uint16_t) -1UL; /* Temporary */
+ ret = bytecode_push_logical(&ctx->bytecode, &insn, 1, sizeof(insn),
+ &skip_offset_loc);
+ if (ret)
+ return ret;
+ /* Visit right child */
+ ret = recursive_visit_gen_bytecode(ctx, node->u.binary.right);
+ if (ret)
+ return ret;
+ /* Cast to s64 if float or field ref */
+ if ((node->u.binary.right->data_type == IR_DATA_FIELD_REF
+ || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
+ || node->u.binary.right->data_type == IR_DATA_EXPRESSION)
+ || node->u.binary.right->data_type == IR_DATA_FLOAT) {
+ struct cast_op cast_insn;
+
+ if (node->u.binary.right->data_type == IR_DATA_FIELD_REF
+ || node->u.binary.right->data_type == IR_DATA_GET_CONTEXT_REF
+ || node->u.binary.right->data_type == IR_DATA_EXPRESSION) {
+ cast_insn.op = BYTECODE_OP_CAST_TO_S64;
+ } else {
+ cast_insn.op = BYTECODE_OP_CAST_DOUBLE_TO_S64;
+ }
+ ret = bytecode_push(&ctx->bytecode, &cast_insn,
+ 1, sizeof(cast_insn));
+ if (ret)
+ return ret;
+ }
+ /* We now know where the logical op can skip. */
+ target_loc = (uint16_t) bytecode_get_len(&ctx->bytecode->b);
+ ret = bytecode_patch(&ctx->bytecode,
+ &target_loc, /* Offset to jump to */
+ skip_offset_loc, /* Where to patch */
+ sizeof(uint16_t));
+ return ret;
+}
+
+/*
+ * Postorder traversal of the tree. We need the children result before
+ * we can evaluate the parent.
+ */
+static
+int recursive_visit_gen_bytecode(struct filter_parser_ctx *ctx,
+ struct ir_op *node)
+{
+ switch (node->op) {
+ case IR_OP_UNKNOWN:
+ default:
+ fprintf(stderr, "[error] Unknown node type in %s\n",
+ __func__);
+ return -EINVAL;
+
+ case IR_OP_ROOT:
+ return visit_node_root(ctx, node);
+ case IR_OP_LOAD:
+ return visit_node_load(ctx, node);
+ case IR_OP_UNARY:
+ return visit_node_unary(ctx, node);
+ case IR_OP_BINARY:
+ return visit_node_binary(ctx, node);
+ case IR_OP_LOGICAL:
+ return visit_node_logical(ctx, node);
+ }
+}
+
+void filter_bytecode_free(struct filter_parser_ctx *ctx)
+{
+ if (!ctx) {
+ return;
+ }
+
+ if (ctx->bytecode) {
+ free(ctx->bytecode);
+ ctx->bytecode = NULL;
+ }
+
+ if (ctx->bytecode_reloc) {
+ free(ctx->bytecode_reloc);
+ ctx->bytecode_reloc = NULL;
+ }
+}
+
+int filter_visitor_bytecode_generate(struct filter_parser_ctx *ctx)
+{
+ int ret;
+
+ ret = bytecode_init(&ctx->bytecode);
+ if (ret)
+ return ret;
+ ret = bytecode_init(&ctx->bytecode_reloc);
+ if (ret)
+ goto error;
+ ret = recursive_visit_gen_bytecode(ctx, ctx->ir_root);
+ if (ret)
+ goto error;
+
+ /* Finally, append symbol table to bytecode */
+ ctx->bytecode->b.reloc_table_offset = bytecode_get_len(&ctx->bytecode->b);
+ return bytecode_push(&ctx->bytecode, ctx->bytecode_reloc->b.data,
+ 1, bytecode_get_len(&ctx->bytecode_reloc->b));
+
+error:
+ filter_bytecode_free(ctx);
+ return ret;
+}