time monotonic

author compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>

Sat, 24 Feb 2007 06:28:35 +0000 (06:28 +0000)

committer compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>

Sat, 24 Feb 2007 06:28:35 +0000 (06:28 +0000)
author compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>
Sat, 24 Feb 2007 06:28:35 +0000 (06:28 +0000)
committer compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>
Sat, 24 Feb 2007 06:28:35 +0000 (06:28 +0000)
diff --git a/ltt/branches/poly/doc/developer/time-monotonic-accurate.txt b/ltt/branches/poly/doc/developer/time-monotonic-accurate.txt

new file mode 100644 (file)

index 0000000..c1bc89d
--- /dev/null
+++ b/ltt/branches/poly/doc/developer/time-monotonic-accurate.txt
@@ -0,0 +1,110 @@
+
+Monotonic accurate time
+
+The goal of this design is to provide a monotonic time :
+
+Readable from userspace without a system call
+Readable from NMI handler
+Readable without disabling interrupts
+Readable without disabling preemption
+Only one clock source (most precise available : tsc)
+Support architectures with variable TSC frequency.
+
+Main difference with wall time currently implemented in the Linux kernel : the
+time update is done atomically instead of using a write seqlock. It permits
+reading time from NMI handler and from userspace.
+
+struct time_info {
+       u64 tsc;
+       u64 freq;
+       u64 walltime;
+}
+
+static struct time_struct {
+       struct time_info time_sel[2];
+       long update_count;
+}
+
+DECLARE_PERCPU(struct time_struct, cpu_time);
+
+/* On frequency change event */
+/* In irq context */
+void freq_change_cb(unsigned int new_freq)
+{
+       struct time_struct this_cpu_time = 
+               per_cpu(cpu_time, smp_processor_id());
+       struct time_info *write_time, *current_time;
+       write_time =
+               this_cpu_time->time_sel[(this_cpu_time->update_count+1)&1];
+       current_time =
+               this_cpu_time->time_sel[(this_cpu_time->update_count)&1];
+       write_time->tsc = get_cycles();
+       write_time->freq = new_freq;
+       /* We cumulate the division imprecision. This is the downside of using
+        * the TSC with variable frequency as a time base. */
+       write_time->walltime = 
+               current_time->walltime + 
+                       (write_time->tsc - current_time->tsc) /
+                       current_time->freq;
+       wmb();
+       this_cpu_time->update_count++;
+}
+
+
+/* Init cpu freq */
+init_cpu_freq()
+{
+       struct time_struct this_cpu_time = 
+               per_cpu(cpu_time, smp_processor_id());
+       struct time_info *current_time;
+       memset(this_cpu_time, 0, sizeof(this_cpu_time));
+       current_time = this_cpu_time->time_sel[this_cpu_time->update_count&1];
+       /* Init current time */
+       /* Get frequency */
+       /* Reset cpus to 0 ns, 0 tsc, start their tsc. */
+}
+
+
+/* After a CPU comes back from hlt */
+/* The trick is to sync all the other CPUs on the first CPU up when they come
+ * up. If all CPUs are down, then there is no need to increment the walltime :
+ * let's simply define the useful walltime on a machine as the time elapsed
+ * while there is a CPU running. If we want, when no cpu is active, we can use
+ * a lower resolution clock to somehow keep track of walltime. */
+
+wake_from_hlt()
+{
+       /* TODO */
+}
+
+
+
+/* Read time from anywhere in the kernel. Return time in walltime. (ns) */
+/* If the update_count changes while we read the context, it may be invalid.
+ * This would happen if we are scheduled out for a period of time long enough to
+ * permit 2 frequency changes. We simply start the loop again if it happens.
+ * We detect it by comparing the update_count running counter. */
+u64 read_time(void)
+{
+       u64 walltime;
+       long update_count;
+       struct time_struct this_cpu_time = 
+               per_cpu(cpu_time, smp_processor_id());
+       struct time_info *current_time;
+       do {
+               update_count = this_cpu_time->update_count;
+               current_time = this_cpu_time->time_sel[update_count&1];
+               walltime = current_time->walltime + 
+                               (get_cycles() - current_time->tsc) /
+                               current_time->freq;
+       } while(this_cpu_time->update_count != update_count);
+       return walltime;
+}
+
+/* Userspace */
+/* Export all this data to user space through the vsyscall page. Use a function
+ * like read_time to read the walltime. This function can be implemented as-is
+ * because it doesn't need to disable preemption. */
+
+
+
author	compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>
	Sat, 24 Feb 2007 06:28:35 +0000 (06:28 +0000)
committer	compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>
	Sat, 24 Feb 2007 06:28:35 +0000 (06:28 +0000)