From 60e1cd0751eec323c62c923629da4abe345e123f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 9 Feb 2017 20:46:44 -0500 Subject: [PATCH] Fix: nmi-safe clock on 32-bit systems On 32-bit systems, the algorithm within lttng-modules that ensures the nmi-safe clock increases monotonically on a CPU assumes to have one clock read per 32-bit LSB overflow period, which is not guaranteed. It also has an issue on the first clock reads after module load, because the initial value for the last LSB is 0. It can cause the time to stay stuck at the same value for a few seconds at the beginning of the trace, which is unfortunate for the first trace after module load, because this is where the offset between realtime and trace_clock is sampled, which prevents correlation of kernel and user-space traces for that session. It only affects 32-bit systems with kernels >= 3.17. Fix this by using the non-nmi-safe clock source on 32-bit systems. While we are there, remove an implementation-defined c99 behavior regarding casting u64 to long by using unsigned arithmetic instead: turn: if (((long) now - (long) last) < 0) into: if (U64_MAX / 2 < now - last) Signed-off-by: Mathieu Desnoyers --- wrapper/trace-clock.c | 6 ++-- wrapper/trace-clock.h | 81 ++++++++++++++----------------------------- 2 files changed, 29 insertions(+), 58 deletions(-) diff --git a/wrapper/trace-clock.c b/wrapper/trace-clock.c index d9bc956a..6ec952b9 100644 --- a/wrapper/trace-clock.c +++ b/wrapper/trace-clock.c @@ -23,10 +23,10 @@ #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)) -DEFINE_PER_CPU(local_t, lttng_last_tsc); +#ifdef LTTNG_USE_NMI_SAFE_CLOCK +DEFINE_PER_CPU(u64, lttng_last_tsc); EXPORT_PER_CPU_SYMBOL(lttng_last_tsc); -#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)) */ +#endif /* #ifdef LTTNG_USE_NMI_SAFE_CLOCK */ #ifdef LTTNG_CLOCK_NMI_SAFE_BROKEN #warning "Your kernel implements a bogus nmi-safe clock source. Falling back to the non-nmi-safe clock source, which discards events traced from NMI context. Upgrade your kernel to resolve this situation." diff --git a/wrapper/trace-clock.h b/wrapper/trace-clock.h index 496fec43..7f17ccd4 100644 --- a/wrapper/trace-clock.h +++ b/wrapper/trace-clock.h @@ -59,65 +59,36 @@ extern struct lttng_trace_clock *lttng_trace_clock; #define LTTNG_CLOCK_NMI_SAFE_BROKEN #endif +/* + * We need clock values to be monotonically increasing per-cpu, which is + * not strictly guaranteed by ktime_get_mono_fast_ns(). It is + * straightforward to do on architectures with a 64-bit cmpxchg(), but + * not so on architectures without 64-bit cmpxchg. For now, only enable + * this feature on 64-bit architectures. + */ + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) \ + && BITS_PER_LONG == 64 \ && !defined(LTTNG_CLOCK_NMI_SAFE_BROKEN)) +#define LTTNG_USE_NMI_SAFE_CLOCK +#endif -DECLARE_PER_CPU(local_t, lttng_last_tsc); +#ifdef LTTNG_USE_NMI_SAFE_CLOCK -#if (BITS_PER_LONG == 32) -/* - * Fixup "src_now" using the 32 LSB from "last". We need to handle overflow and - * underflow of the 32nd bit. "last" can be above, below or equal to the 32 LSB - * of "src_now". - */ -static inline u64 trace_clock_fixup(u64 src_now, u32 last) -{ - u64 now; - - now = src_now & 0xFFFFFFFF00000000ULL; - now |= (u64) last; - /* Detect overflow or underflow between now and last. */ - if ((src_now & 0x80000000U) && !(last & 0x80000000U)) { - /* - * If 32nd bit transitions from 1 to 0, and we move forward in - * time from "now" to "last", then we have an overflow. - */ - if (((s32) now - (s32) last) < 0) - now += 0x0000000100000000ULL; - } else if (!(src_now & 0x80000000U) && (last & 0x80000000U)) { - /* - * If 32nd bit transitions from 0 to 1, and we move backward in - * time from "now" to "last", then we have an underflow. - */ - if (((s32) now - (s32) last) > 0) - now -= 0x0000000100000000ULL; - } - return now; -} -#else /* #if (BITS_PER_LONG == 32) */ -/* - * The fixup is pretty easy on 64-bit architectures: "last" is a 64-bit - * value, so we can use last directly as current time. - */ -static inline u64 trace_clock_fixup(u64 src_now, u64 last) -{ - return last; -} -#endif /* #else #if (BITS_PER_LONG == 32) */ +DECLARE_PER_CPU(u64, lttng_last_tsc); /* * Sometimes called with preemption enabled. Can be interrupted. */ static inline u64 trace_clock_monotonic_wrapper(void) { - u64 now; - unsigned long last, result; - local_t *last_tsc; + u64 now, last, result; + u64 *last_tsc_ptr; /* Use fast nmi-safe monotonic clock provided by the Linux kernel. */ preempt_disable(); - last_tsc = lttng_this_cpu_ptr(<tng_last_tsc); - last = local_read(last_tsc); + last_tsc_ptr = lttng_this_cpu_ptr(<tng_last_tsc); + last = *last_tsc_ptr; /* * Read "last" before "now". It is not strictly required, but it ensures * that an interrupt coming in won't artificially trigger a case where @@ -126,9 +97,9 @@ static inline u64 trace_clock_monotonic_wrapper(void) */ barrier(); now = ktime_get_mono_fast_ns(); - if (((long) now - (long) last) < 0) - now = trace_clock_fixup(now, last); - result = local_cmpxchg(last_tsc, last, (unsigned long) now); + if (U64_MAX / 2 < now - last) + now = last; + result = cmpxchg64_local(last_tsc_ptr, last, now); preempt_enable(); if (result == last) { /* Update done. */ @@ -139,11 +110,11 @@ static inline u64 trace_clock_monotonic_wrapper(void) * "result", since it has been sampled concurrently with our * time read, so it should not be far from "now". */ - return trace_clock_fixup(now, result); + return result; } } -#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)) */ +#else /* #ifdef LTTNG_USE_NMI_SAFE_CLOCK */ static inline u64 trace_clock_monotonic_wrapper(void) { ktime_t ktime; @@ -158,7 +129,7 @@ static inline u64 trace_clock_monotonic_wrapper(void) ktime = ktime_get(); return ktime_to_ns(ktime); } -#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)) */ +#endif /* #else #ifdef LTTNG_USE_NMI_SAFE_CLOCK */ static inline u64 trace_clock_read64_monotonic(void) { @@ -185,19 +156,19 @@ static inline const char *trace_clock_description_monotonic(void) return "Monotonic Clock"; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)) +#ifdef LTTNG_USE_NMI_SAFE_CLOCK static inline int get_trace_clock(void) { printk_once(KERN_WARNING "LTTng: Using mainline kernel monotonic fast clock, which is NMI-safe.\n"); return 0; } -#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)) */ +#else /* #ifdef LTTNG_USE_NMI_SAFE_CLOCK */ static inline int get_trace_clock(void) { printk_once(KERN_WARNING "LTTng: Using mainline kernel monotonic clock. NMIs will not be traced.\n"); return 0; } -#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)) */ +#endif /* #else #ifdef LTTNG_USE_NMI_SAFE_CLOCK */ static inline void put_trace_clock(void) { -- 2.34.1