From d8ce0927bf851fadc23714ad3a242a347c6431bc Mon Sep 17 00:00:00 2001 From: compudj Date: Sat, 24 Feb 2007 06:28:35 +0000 Subject: [PATCH] time monotonic git-svn-id: http://ltt.polymtl.ca/svn@2397 04897980-b3bd-0310-b5e0-8ef037075253 --- .../doc/developer/time-monotonic-accurate.txt | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 ltt/branches/poly/doc/developer/time-monotonic-accurate.txt diff --git a/ltt/branches/poly/doc/developer/time-monotonic-accurate.txt b/ltt/branches/poly/doc/developer/time-monotonic-accurate.txt new file mode 100644 index 00000000..c1bc89d4 --- /dev/null +++ b/ltt/branches/poly/doc/developer/time-monotonic-accurate.txt @@ -0,0 +1,110 @@ + +Monotonic accurate time + +The goal of this design is to provide a monotonic time : + +Readable from userspace without a system call +Readable from NMI handler +Readable without disabling interrupts +Readable without disabling preemption +Only one clock source (most precise available : tsc) +Support architectures with variable TSC frequency. + +Main difference with wall time currently implemented in the Linux kernel : the +time update is done atomically instead of using a write seqlock. It permits +reading time from NMI handler and from userspace. + +struct time_info { + u64 tsc; + u64 freq; + u64 walltime; +} + +static struct time_struct { + struct time_info time_sel[2]; + long update_count; +} + +DECLARE_PERCPU(struct time_struct, cpu_time); + +/* On frequency change event */ +/* In irq context */ +void freq_change_cb(unsigned int new_freq) +{ + struct time_struct this_cpu_time = + per_cpu(cpu_time, smp_processor_id()); + struct time_info *write_time, *current_time; + write_time = + this_cpu_time->time_sel[(this_cpu_time->update_count+1)&1]; + current_time = + this_cpu_time->time_sel[(this_cpu_time->update_count)&1]; + write_time->tsc = get_cycles(); + write_time->freq = new_freq; + /* We cumulate the division imprecision. This is the downside of using + * the TSC with variable frequency as a time base. */ + write_time->walltime = + current_time->walltime + + (write_time->tsc - current_time->tsc) / + current_time->freq; + wmb(); + this_cpu_time->update_count++; +} + + +/* Init cpu freq */ +init_cpu_freq() +{ + struct time_struct this_cpu_time = + per_cpu(cpu_time, smp_processor_id()); + struct time_info *current_time; + memset(this_cpu_time, 0, sizeof(this_cpu_time)); + current_time = this_cpu_time->time_sel[this_cpu_time->update_count&1]; + /* Init current time */ + /* Get frequency */ + /* Reset cpus to 0 ns, 0 tsc, start their tsc. */ +} + + +/* After a CPU comes back from hlt */ +/* The trick is to sync all the other CPUs on the first CPU up when they come + * up. If all CPUs are down, then there is no need to increment the walltime : + * let's simply define the useful walltime on a machine as the time elapsed + * while there is a CPU running. If we want, when no cpu is active, we can use + * a lower resolution clock to somehow keep track of walltime. */ + +wake_from_hlt() +{ + /* TODO */ +} + + + +/* Read time from anywhere in the kernel. Return time in walltime. (ns) */ +/* If the update_count changes while we read the context, it may be invalid. + * This would happen if we are scheduled out for a period of time long enough to + * permit 2 frequency changes. We simply start the loop again if it happens. + * We detect it by comparing the update_count running counter. */ +u64 read_time(void) +{ + u64 walltime; + long update_count; + struct time_struct this_cpu_time = + per_cpu(cpu_time, smp_processor_id()); + struct time_info *current_time; + do { + update_count = this_cpu_time->update_count; + current_time = this_cpu_time->time_sel[update_count&1]; + walltime = current_time->walltime + + (get_cycles() - current_time->tsc) / + current_time->freq; + } while(this_cpu_time->update_count != update_count); + return walltime; +} + +/* Userspace */ +/* Export all this data to user space through the vsyscall page. Use a function + * like read_time to read the walltime. This function can be implemented as-is + * because it doesn't need to disable preemption. */ + + + -- 2.34.1