Add support for kvm x86 specific tracepoints
[lttng-modules.git] / lttng-syscalls.c
index 28348abe350a7e3de15f2d0824745d6a11e42963..62ed24aa66723c2cb16b8abe2a83cd6b5a8ccb48 100644 (file)
 /*
  * lttng-syscalls.c
  *
- * Copyright 2010 (c) - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * LTTng syscall probes.
  *
- * LTTng sched probes.
+ * Copyright (C) 2010-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  *
- * Dual LGPL v2.1/GPL v2 license.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/compat.h>
 #include <asm/ptrace.h>
 #include <asm/syscall.h>
 
-#include "ltt-events.h"
+#include "wrapper/tracepoint.h"
+#include "lttng-events.h"
+
+#ifndef CONFIG_COMPAT
+# ifndef is_compat_task
+#  define is_compat_task()     (0)
+# endif
+#endif
+
+static
+void syscall_entry_probe(void *__data, struct pt_regs *regs, long id);
+
+/*
+ * Forward declarations for old kernels.
+ */
+struct mmsghdr;
+struct rlimit64;
+struct oldold_utsname;
+struct old_utsname;
+struct sel_arg_struct;
+struct mmap_arg_struct;
 
-static void syscall_entry_probe(void *__data, struct pt_regs *regs, long id);
+/*
+ * Take care of NOARGS not supported by mainline.
+ */
+#define DECLARE_EVENT_CLASS_NOARGS(name, tstruct, assign, print)
+#define DEFINE_EVENT_NOARGS(template, name)
+#define TRACE_EVENT_NOARGS(name, struct, assign, print)
 
 /*
  * Create LTTng tracepoint probes.
  */
 #define LTTNG_PACKAGE_BUILD
 #define CREATE_TRACE_POINTS
+#define TP_MODULE_NOINIT
+#define TRACE_INCLUDE_PATH ../instrumentation/syscalls/headers
+
+#define PARAMS(args...)        args
 
 /* Hijack probe callback for system calls */
+#undef TP_PROBE_CB
 #define TP_PROBE_CB(_template)         &syscall_entry_probe
-#define TP_MODULE_OVERRIDE
-
-#define TRACE_INCLUDE_PATH ../instrumentation/syscalls/headers
+#define SC_TRACE_EVENT(_name, _proto, _args, _struct, _assign, _printk)        \
+       TRACE_EVENT(_name, PARAMS(_proto), PARAMS(_args),\
+               PARAMS(_struct), PARAMS(_assign), PARAMS(_printk))
+#define SC_DECLARE_EVENT_CLASS_NOARGS(_name, _struct, _assign, _printk)        \
+       DECLARE_EVENT_CLASS_NOARGS(_name, PARAMS(_struct), PARAMS(_assign),\
+               PARAMS(_printk))
+#define SC_DEFINE_EVENT_NOARGS(_template, _name)                       \
+       DEFINE_EVENT_NOARGS(_template, _name)
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM syscalls_integers
+#include "instrumentation/syscalls/headers/syscalls_integers.h"
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM syscalls_pointers
+#include "instrumentation/syscalls/headers/syscalls_pointers.h"
+#undef TRACE_SYSTEM
+#undef SC_TRACE_EVENT
+#undef SC_DECLARE_EVENT_CLASS_NOARGS
+#undef SC_DEFINE_EVENT_NOARGS
+
+#define TRACE_SYSTEM syscalls_unknown
+#include "instrumentation/syscalls/headers/syscalls_unknown.h"
+#undef TRACE_SYSTEM
+
+/* For compat syscalls */
+#undef _TRACE_SYSCALLS_integers_H
+#undef _TRACE_SYSCALLS_pointers_H
 
-#include "instrumentation/syscalls/headers/syscalls.h"
-
-#undef TP_MODULE_OVERRIDE
+/* Hijack probe callback for system calls */
+#undef TP_PROBE_CB
+#define TP_PROBE_CB(_template)         &syscall_entry_probe
+#define SC_TRACE_EVENT(_name, _proto, _args, _struct, _assign, _printk)        \
+       TRACE_EVENT(compat_##_name, PARAMS(_proto), PARAMS(_args),      \
+               PARAMS(_struct), PARAMS(_assign),                       \
+               PARAMS(_printk))
+#define SC_DECLARE_EVENT_CLASS_NOARGS(_name, _struct, _assign, _printk) \
+       DECLARE_EVENT_CLASS_NOARGS(compat_##_name, PARAMS(_struct),     \
+               PARAMS(_assign), PARAMS(_printk))
+#define SC_DEFINE_EVENT_NOARGS(_template, _name)                       \
+       DEFINE_EVENT_NOARGS(compat_##_template, compat_##_name)
+#define TRACE_SYSTEM compat_syscalls_integers
+#include "instrumentation/syscalls/headers/compat_syscalls_integers.h"
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM compat_syscalls_pointers
+#include "instrumentation/syscalls/headers/compat_syscalls_pointers.h"
+#undef TRACE_SYSTEM
+#undef SC_TRACE_EVENT
+#undef SC_DECLARE_EVENT_CLASS_NOARGS
+#undef SC_DEFINE_EVENT_NOARGS
 #undef TP_PROBE_CB
+
+#undef TP_MODULE_NOINIT
 #undef LTTNG_PACKAGE_BUILD
 #undef CREATE_TRACE_POINTS
 
 struct trace_syscall_entry {
        void *func;
-       const struct lttng_event_desc *desc;    /* Set dynamically */
+       const struct lttng_event_desc *desc;
        const struct lttng_event_field *fields;
        unsigned int nrargs;
 };
 
-static int sc_table_desc_filled;
-
 #define CREATE_SYSCALL_TABLE
 
 #undef TRACE_SYSCALL_TABLE
-#define TRACE_SYSCALL_TABLE(_name, _nr, _nrargs)               \
+#define TRACE_SYSCALL_TABLE(_template, _name, _nr, _nrargs)    \
+       [ _nr ] = {                                             \
+               .func = __event_probe__##_template,             \
+               .nrargs = (_nrargs),                            \
+               .fields = __event_fields___##_template,         \
+               .desc = &__event_desc___##_name,                \
+       },
+
+static const struct trace_syscall_entry sc_table[] = {
+#include "instrumentation/syscalls/headers/syscalls_integers.h"
+#include "instrumentation/syscalls/headers/syscalls_pointers.h"
+};
+
+#undef TRACE_SYSCALL_TABLE
+#define TRACE_SYSCALL_TABLE(_template, _name, _nr, _nrargs)    \
        [ _nr ] = {                                             \
-               .func = __event_probe__##_name,                 \
+               .func = __event_probe__##compat_##_template,    \
                .nrargs = (_nrargs),                            \
-               .fields = __event_fields___##_name,             \
+               .fields = __event_fields___##compat_##_template,\
+               .desc = &__event_desc___##compat_##_name,       \
        },
 
-static struct trace_syscall_entry sc_table[] = {
-#include "instrumentation/syscalls/headers/syscalls.h"
+/* Create compatibility syscall table */
+const struct trace_syscall_entry compat_sc_table[] = {
+#include "instrumentation/syscalls/headers/compat_syscalls_integers.h"
+#include "instrumentation/syscalls/headers/compat_syscalls_pointers.h"
 };
 
 #undef CREATE_SYSCALL_TABLE
 
-static void syscall_entry_probe(void *__data, struct pt_regs *regs, long id)
+static void syscall_entry_unknown(struct lttng_event *event,
+       struct pt_regs *regs, unsigned int id)
 {
-       struct trace_syscall_entry *entry;
-       struct ltt_channel *chan = __data;
-       struct ltt_event *event;
+       unsigned long args[UNKNOWN_SYSCALL_NRARGS];
 
-       if (unlikely(id >= ARRAY_SIZE(sc_table)))
+       syscall_get_arguments(current, regs, 0, UNKNOWN_SYSCALL_NRARGS, args);
+       if (unlikely(is_compat_task()))
+               __event_probe__compat_sys_unknown(event, id, args);
+       else
+               __event_probe__sys_unknown(event, id, args);
+}
+
+void syscall_entry_probe(void *__data, struct pt_regs *regs, long id)
+{
+       struct lttng_channel *chan = __data;
+       struct lttng_event *event, *unknown_event;
+       const struct trace_syscall_entry *table, *entry;
+       size_t table_len;
+
+       if (unlikely(is_compat_task())) {
+               table = compat_sc_table;
+               table_len = ARRAY_SIZE(compat_sc_table);
+               unknown_event = chan->sc_compat_unknown;
+       } else {
+               table = sc_table;
+               table_len = ARRAY_SIZE(sc_table);
+               unknown_event = chan->sc_unknown;
+       }
+       if (unlikely(id >= table_len)) {
+               syscall_entry_unknown(unknown_event, regs, id);
                return;
-       entry = &sc_table[id];
-       if (unlikely(!entry->func))
+       }
+       if (unlikely(is_compat_task()))
+               event = chan->compat_sc_table[id];
+       else
+               event = chan->sc_table[id];
+       if (unlikely(!event)) {
+               syscall_entry_unknown(unknown_event, regs, id);
                return;
-       event = chan->sc_table[id];
-       WARN_ON_ONCE(!event);
+       }
+       entry = &table[id];
+       WARN_ON_ONCE(!entry);
 
        switch (entry->nrargs) {
        case 0:
@@ -163,104 +293,167 @@ static void syscall_entry_probe(void *__data, struct pt_regs *regs, long id)
        }
 }
 
-static const struct lttng_event_desc *find_syscall_desc(unsigned int id)
+/* noinline to diminish caller stack size */
+static
+int fill_table(const struct trace_syscall_entry *table, size_t table_len,
+       struct lttng_event **chan_table, struct lttng_channel *chan, void *filter)
 {
+       const struct lttng_event_desc *desc;
        unsigned int i;
 
-       for (i = 0; i < __probe_desc___syscalls.nr_events; i++) {
-               if (__probe_desc___syscalls.event_desc[i].fields
-                               == sc_table[id].fields)
-                       return &__probe_desc___syscalls.event_desc[i];
-       }
-       WARN_ON_ONCE(1);
-       return NULL;
-}
-
-static void fill_sc_table_desc(void)
-{
-       unsigned int i;
-
-       if (sc_table_desc_filled)
-               return;
-       /*
-        * This is O(n^2), but rare. Eventually get the TRACE_EVENT code
-        * to emit per-event symbols to skip this.
-        */
-       for (i = 0; i < ARRAY_SIZE(sc_table); i++) {
-               const struct lttng_event_desc **desc = &sc_table[i].desc;
+       /* Allocate events for each syscall, insert into table */
+       for (i = 0; i < table_len; i++) {
+               struct lttng_kernel_event ev;
+               desc = table[i].desc;
 
-               if (!sc_table[i].func)
+               if (!desc) {
+                       /* Unknown syscall */
                        continue;
-               (*desc) = find_syscall_desc(i);
+               }
+               /*
+                * Skip those already populated by previous failed
+                * register for this channel.
+                */
+               if (chan_table[i])
+                       continue;
+               memset(&ev, 0, sizeof(ev));
+               strncpy(ev.name, desc->name, LTTNG_KERNEL_SYM_NAME_LEN);
+               ev.name[LTTNG_KERNEL_SYM_NAME_LEN - 1] = '\0';
+               ev.instrumentation = LTTNG_KERNEL_NOOP;
+               chan_table[i] = lttng_event_create(chan, &ev, filter,
+                                               desc);
+               if (!chan_table[i]) {
+                       /*
+                        * If something goes wrong in event registration
+                        * after the first one, we have no choice but to
+                        * leave the previous events in there, until
+                        * deleted by session teardown.
+                        */
+                       return -EINVAL;
+               }
        }
-       sc_table_desc_filled = 1;
+       return 0;
 }
 
-
-int lttng_syscalls_register(struct ltt_channel *chan, void *filter)
+int lttng_syscalls_register(struct lttng_channel *chan, void *filter)
 {
-       unsigned int i;
+       struct lttng_kernel_event ev;
        int ret;
 
        wrapper_vmalloc_sync_all();
-       fill_sc_table_desc();
 
        if (!chan->sc_table) {
                /* create syscall table mapping syscall to events */
-               chan->sc_table = kzalloc(sizeof(struct ltt_event *)
+               chan->sc_table = kzalloc(sizeof(struct lttng_event *)
                                        * ARRAY_SIZE(sc_table), GFP_KERNEL);
                if (!chan->sc_table)
                        return -ENOMEM;
        }
 
-       /* Allocate events for each syscall, insert into table */
-       for (i = 0; i < ARRAY_SIZE(sc_table); i++) {
-               struct lttng_kernel_event ev;
-               const struct lttng_event_desc *desc = sc_table[i].desc;
+#ifdef CONFIG_COMPAT
+       if (!chan->compat_sc_table) {
+               /* create syscall table mapping compat syscall to events */
+               chan->compat_sc_table = kzalloc(sizeof(struct lttng_event *)
+                                       * ARRAY_SIZE(compat_sc_table), GFP_KERNEL);
+               if (!chan->compat_sc_table)
+                       return -ENOMEM;
+       }
+#endif
+       if (!chan->sc_unknown) {
+               const struct lttng_event_desc *desc =
+                       &__event_desc___sys_unknown;
 
-               if (!desc)
-                       continue;
-               /*
-                * Skip those already populated by previous failed
-                * register for this channel.
-                */
-               if (chan->sc_table[i])
-                       continue;
                memset(&ev, 0, sizeof(ev));
-               strncpy(ev.name, desc->name, LTTNG_SYM_NAME_LEN);
-               ev.name[LTTNG_SYM_NAME_LEN - 1] = '\0';
+               strncpy(ev.name, desc->name, LTTNG_KERNEL_SYM_NAME_LEN);
+               ev.name[LTTNG_KERNEL_SYM_NAME_LEN - 1] = '\0';
                ev.instrumentation = LTTNG_KERNEL_NOOP;
-               chan->sc_table[i] = ltt_event_create(chan, &ev, filter,
-                                                    desc);
-               if (!chan->sc_table[i]) {
-                       /*
-                        * If something goes wrong in event registration
-                        * after the first one, we have no choice but to
-                        * leave the previous events in there, until
-                        * deleted by session teardown.
-                        */
+               chan->sc_unknown = lttng_event_create(chan, &ev, filter,
+                                                   desc);
+               if (!chan->sc_unknown) {
                        return -EINVAL;
                }
        }
-       ret = tracepoint_probe_register("sys_enter",
+
+       if (!chan->sc_compat_unknown) {
+               const struct lttng_event_desc *desc =
+                       &__event_desc___compat_sys_unknown;
+
+               memset(&ev, 0, sizeof(ev));
+               strncpy(ev.name, desc->name, LTTNG_KERNEL_SYM_NAME_LEN);
+               ev.name[LTTNG_KERNEL_SYM_NAME_LEN - 1] = '\0';
+               ev.instrumentation = LTTNG_KERNEL_NOOP;
+               chan->sc_compat_unknown = lttng_event_create(chan, &ev, filter,
+                                                          desc);
+               if (!chan->sc_compat_unknown) {
+                       return -EINVAL;
+               }
+       }
+
+       if (!chan->sc_exit) {
+               const struct lttng_event_desc *desc =
+                       &__event_desc___exit_syscall;
+
+               memset(&ev, 0, sizeof(ev));
+               strncpy(ev.name, desc->name, LTTNG_KERNEL_SYM_NAME_LEN);
+               ev.name[LTTNG_KERNEL_SYM_NAME_LEN - 1] = '\0';
+               ev.instrumentation = LTTNG_KERNEL_NOOP;
+               chan->sc_exit = lttng_event_create(chan, &ev, filter,
+                                                desc);
+               if (!chan->sc_exit) {
+                       return -EINVAL;
+               }
+       }
+
+       ret = fill_table(sc_table, ARRAY_SIZE(sc_table),
+                       chan->sc_table, chan, filter);
+       if (ret)
+               return ret;
+#ifdef CONFIG_COMPAT
+       ret = fill_table(compat_sc_table, ARRAY_SIZE(compat_sc_table),
+                       chan->compat_sc_table, chan, filter);
+       if (ret)
+               return ret;
+#endif
+       ret = kabi_2635_tracepoint_probe_register("sys_enter",
                        (void *) syscall_entry_probe, chan);
+       if (ret)
+               return ret;
+       /*
+        * We change the name of sys_exit tracepoint due to namespace
+        * conflict with sys_exit syscall entry.
+        */
+       ret = kabi_2635_tracepoint_probe_register("sys_exit",
+                       (void *) __event_probe__exit_syscall,
+                       chan->sc_exit);
+       if (ret) {
+               WARN_ON_ONCE(kabi_2635_tracepoint_probe_unregister("sys_enter",
+                       (void *) syscall_entry_probe, chan));
+       }
        return ret;
 }
 
 /*
  * Only called at session destruction.
  */
-int lttng_syscalls_unregister(struct ltt_channel *chan)
+int lttng_syscalls_unregister(struct lttng_channel *chan)
 {
        int ret;
 
        if (!chan->sc_table)
                return 0;
-       ret = tracepoint_probe_unregister("sys_enter",
+       ret = kabi_2635_tracepoint_probe_unregister("sys_exit",
+                       (void *) __event_probe__exit_syscall,
+                       chan->sc_exit);
+       if (ret)
+               return ret;
+       ret = kabi_2635_tracepoint_probe_unregister("sys_enter",
                        (void *) syscall_entry_probe, chan);
        if (ret)
                return ret;
-       /* ltt_event destroy will be performed by ltt_session_destroy() */
+       /* lttng_event destroy will be performed by lttng_session_destroy() */
        kfree(chan->sc_table);
+#ifdef CONFIG_COMPAT
+       kfree(chan->compat_sc_table);
+#endif
        return 0;
 }
This page took 0.033999 seconds and 4 git commands to generate.