From c7b68d19da9138a52e940a2265e35d2e31cdff35 Mon Sep 17 00:00:00 2001
From: compudj <compudj@04897980-b3bd-0310-b5e0-8ef037075253>
Date: Fri, 29 Sep 2006 20:55:18 +0000
Subject: [PATCH] start table

git-svn-id: http://ltt.polymtl.ca/svn@2144 04897980-b3bd-0310-b5e0-8ef037075253
---
 tests/markers/Makefile                   |   1 +
 tests/markers/markers-microbench-0.2.txt |  62 +++++++++++++++++------
 tests/markers/markers-result.gnumeric    | Bin 0 -> 2137 bytes
 3 files changed, 48 insertions(+), 15 deletions(-)
 create mode 100644 tests/markers/markers-result.gnumeric

diff --git a/tests/markers/Makefile b/tests/markers/Makefile
index 3fef350c..47003986 100644
--- a/tests/markers/Makefile
+++ b/tests/markers/Makefile
@@ -9,6 +9,7 @@ endif
 	obj-m += probe-string.o
 	obj-m += test-micro-loop-probe.o
 	obj-m += test-asm.o
+	obj-m += test-kprobes.o
 else
 	KERNELDIR ?= /lib/modules/$(shell uname -r)/build
 	PWD := $(shell pwd)
diff --git a/tests/markers/markers-microbench-0.2.txt b/tests/markers/markers-microbench-0.2.txt
index b97b059d..296a1f50 100644
--- a/tests/markers/markers-microbench-0.2.txt
+++ b/tests/markers/markers-microbench-0.2.txt
@@ -1,6 +1,22 @@
 
 
-* Microbenchmarks
+Hi,
+
+Following the huge discussion thread about tracing/static vs dynamic
+instrumentation/markers, a consensus seems to emerge about the need for a
+marker system in the Linux kernel. The main issues this mechanism addresses are:
+
+- Identify code important to runtime data collection/analysis tools in tree so
+  that it follows the code changes naturally.
+- Be visually appealing to kernel developers.
+- Have a very low impact on the system performance.
+- Integrate in the standard kernel infrastructure : use C and loadable modules.
+
+The time has come for some performance measurements of the Linux Kernel Markers,
+which follows.
+
+
+* Micro-benchmarks
 
 Use timestamp counter to calculate the time spent, with interrupts disabled.
 Machine : Pentium 4 3GHz, 1GB ram
@@ -106,13 +122,19 @@ additional cycles per loop to get expected variable arguments on x86 :
 
 - Execute a loop with marker enabled, with var args probe, format string
   Data is copied by the probe. This is a 6 bytes string to decode.
-processing.
 NR_LOOPS : 100000
 time delta (cycles): 9622117
 cycles per loop : 96.22
 additional cycles per loop to dynamically parse arguments with a 6 bytes format
-string :
-96.22-55.74=40.48
+string : 96.22-55.74=40.48
+
+- Execute a loop with marker enabled, with var args probe expecting arguments.
+  Data is copied by the probe. With preemption disabling. An empty "kprobe" is
+  connected to the probe.
+NR_LOOPS : 100000
+time delta (cycles): 423397455
+cycles per loop : 4233.97
+additional cycles per loop to execute the kprobe : 4233.97-55.74=4178.23
 
 
 * Assembly code
@@ -272,7 +294,7 @@ Length of the marker name + 7 bytes (__mark_)
 12 bytes (3 pointers)
 
 
-* Macrobenchmarks
+* Macro-benchmarks
 
 Compiling a 2.6.17 kernel on a Pentium 4 3GHz, 1GB ram, cold cache.
 Running a 2.6.17 vanilla kernel :
@@ -285,6 +307,8 @@ real    8m1.635s
 user    7m34.552s
 sys     0m36.298s
 
+--> 0.98 % speedup with markers
+
 Ping flood on loopback interface :
 Running a 2.6.17 vanilla kernel :
 136596 packets transmitted, 136596 packets received, 0% packet loss
@@ -306,6 +330,7 @@ sys     0m8.353s
 
 12596 packets transmitted/s
 
+--> 0.03 % slowdown with markers
 
 
 Conclusion
@@ -329,15 +354,22 @@ ability to insert a breakpoint at any location without any impact on the code
 when inactive. This breakpoint based approach is very useful to instrument core
 kernel code that has not been previously marked without need to recompile and
 reboot. We can therefore compare the case "without markers" to the null impact
-of the int3 breakpoint based approach when inactive.
-
-
-
-
-
-
-
-
-
+of an inactive int3 breakpoint.
+
+However, the performance impact for using a kprobe is non negligible when
+activated. Assuming that kprobes would have a mechanism to get the variables
+from the caller's stack, it would perform the same task in at least 4178.23
+cycles vs 55.74 for a marker and a probe (ratio : 75). While kprobes are very
+useful for the reason explained earlier, the high event rate paths in the kernel
+would clearly benefit from a marker mechanism when the are probed.
+
+Code size and memory footprints are smaller with the optimized version : 6
+bytes of code in the likely path compared to 11 bytes. The memory footprint of
+the optimized approach saves 4 bytes of data memory that would otherwise have to
+stay in cache.
+
+On the macro-benchmark side, no significant difference in performance has been
+found between the vanilla kernel and a kernel "marked" with the standard LTTng
+instrumentation.
 
 
diff --git a/tests/markers/markers-result.gnumeric b/tests/markers/markers-result.gnumeric
new file mode 100644
index 0000000000000000000000000000000000000000..9f3c78e87e2f0f0fb1de7bc4aff23d2aecd331d6
GIT binary patch
literal 2137
zcmV-f2&VTRiwFQCh#f@$1MOUGZ`(K${@!0f=)-;5IFjtRPOkAHX_Cf3k_}=f-QB~%
zpe4rUMIr@?itYXNJ0xXUFLq+>b%DC87AVN!aQJZaa5$#Ke)@S42I!TDm~wV%IW5~l
zgn8VjY<g-vj;;=R*3bP9pQbE4`O3x1g!2~!U08eqqEl-oWpr|QIG@j3Q<j88P_M<s
z^zhZOTfX$IRCf{6g6_PnwOreF4u8G98G17k;seTJiJ3>BS4>Z0Exh3#mKxjEp?a-F
z>|6a02x*v4W-{n9;@%9VWT9L9KQ2zKE|e%!rM88xk|0p??Y7$1Vb=FdN<k-yB(V~e
zg+`W2<uW4u&ZonyoNr_}B>k1Yo+z20oWwIeAJ4d;e{d#oFpfRJgJ6P%F`;|pO}yeD
zA^p+A<E2qBb%W|~1FUZi6*wWOw+BEOmqdxi5>Mjq52Me=IFWqp@h}RAqVo5Lk;dal
zfEztY{5R2Eo!ty~f>#p7*$NvbVTi>tH-&*DVHF^bqJWyEza7y*4EEWyzlsMLvQ<M4
z-g=m~MolCv(f0?u#d1bTf-XqR_!0unE+d;-&H=_DGeYDYj5T0BS>zCA4Sij5((ajb
ztERiNW_yEXNtB_|{l5?%5-FDGEA=G=*X|Jn)oVhhGYJ(tA4;`_xI8wxpm7x7<rNoU
z65#mMf(_@I`s8;acvkr$sGn6n^Wd6bpNLHIz~?zhYVnsu29%LlNBp!hT!s@K#OEa7
z^9}L`gB#>eG^P_8P`Ny{zTDl8hu4>vqw&*VI5;2N3`W11;?1R1AVlTi+?6SFix&XD
zpFWYaR#Jf3^xaOTp{jX}-#HKaseN6}H{Ju+<bXYcH}z6kHD=bu6gJhhbQK?Xgzh<X
zRo-&JhxdX!KQao=@Ivzp3;Gi-B@cJ&vYJR~WJ3YiPY6;G>C}pd@CfXLQ`qj;WiW2r
z1#@~`sa7J8jydX~mlbC3DNKP;+gr&iVPpC{<}6r_W4R1Syj?lu3=#_g3t_vCFYvS6
z-Yltna|kYg+<kv6XMigWM@ClGqsndi$wypBfvH?+uUm?J%*sJG<QX2F%7}bF01*Ge
zVf63+jLy#gYoT;qRA60kE;n?&hv$Y&|NFBqmzxY2P}|sQxS`!%CX5n3kg3Y&AU6~e
z){kEFy7Hh_b;myLs+`r=P2ILG>z3}(QTxcMkXhSA*JlBpGPC&8qHCeA0!JgVka~>a
z4Dh~;_!Twv2QJf8R5yYfbxB1?W;7sKeIdR!3ULa=S_-jFX`aAe3!P){<<z=@KPTBg
zu$Zs}&oY>TN>5YOEA1x;NQ4oAgThR*C&DB$tI$!#d0^t^05VMK>Ea{66DsH#*swdj
zkP*-~na+&F6ExEB`$NpqjZE3yFr0SXd5MX+_7TJtty3a(PUYN$l6SNe#op7=BzsTk
zg5oJ>IC$3>c|CYnH{BXHJ0aIt4u&}v@5Jv;p5OMG+|JRO-0o3B?S|TWN9{uBZo6x4
zw;O6V)JFSEZFfu5c5@j?!|R6E`^M|`7GCEfcH3!)-4J`<h+PQSuCp;>H_UFBz30q!
zjyE#fwSNF++b!E}XkB9BKZDjEH?sP;A$3FQy&|>K-I8DDkFq=VadRfpkbCdQZErit
zZZhnK*!xWE?zVR{>hQXG1KH3!Q`$><7vi>i)ZP3JvdKyscJCv*i^n6IU)4ChhS?3X
zze8r{8nUmYG!URbj4sGCP69cke~^3isa_{f<CU8F1Pk5q^dTz>fEqHN?44;goAS9T
zmp=l__jgwPUDs`GH-WCP@D(K0<1Ksc%AVsot)rUujAN7+3T}bbTy=}%wmu$}Jp-B<
zlFk~5(p)34b4Z~4nI~!mL$oO2P{S5*3`syd6$E1RIT0}zs~<w9W_H?68B^9Pe_~MB
zoI4wTRe`t!Xgz+BH6#eiU)?DgT3OWQzynX}W5tpFbh8U99-Qz(#DV%^5(S(`srRAh
zR%32ztXnbmdFiR|AtNGowY38Fv102hl{2156l#ER4O~TA9k2x?+@Zgn_U{$?Lsae8
zm`JZOkzXQ7Lm;gG6Jnelc$_i)f&O2ZS+6p)P7P?s6=-^2q2B)w?6?BVZFN7^z~(p=
za3Kl3Xo>hUQlEyR$x@O!kR10L($i^wLw=TwX@a`Py_VChgQ)X&60KxCgXp&FK&(ca
zZ$`M1Z4Iz(+by?O2Vixvo(qCsd_HH2cb?`;9fQ>+b{R&pc4T#`>uI+TwgTeXe<wRi
z);p_e@x_N*4{<dBT+kR#Aa9+bNDu;k%<d5FhSI4z(Qb8Wf!Yv5FepWWPsmRQxZ)#7
zB9ai&{}??BNNWWod9keNPq!LAk6Yb3Yt5|&sD};cAm8v_5M@Zth?zwlnANytw|d8Q
zP*#JMSs$rG$s#hGe8oaR-{8cxq?DGa>pM4isX{XR2r+jX&hlxc*5bh{ME`pjE+ApL
z!K)u|QKHgGl@l&TG$d($xERRva?%}VcVw!SftH>To+Xm&t1#*BLC8B-`e!Vj6Y*dg
z2#cwtuT(CL;06|=j7|BT2z8Gltq499*i=&BxocdhAglkNcd*>I?_eD`?_I*mX*8Fx
znoC&CC9J>nYOi?#(j24}lzy0lv|>u|#<@}{ci4FFV;2s7>|M@%wyijOz259Pr#>Z^
zO(OJzBtq>b5o!{lyiSt{HHlD@2vt>@M5x@*Btrj~L?~BX|L;tXg9|J%GR`9Y2v0Ht
z#$7n@AXC!M;})|72M+{bo7H{EF!(|NPeYTF&0Gs&vz?+$FyaA}m`BXfuXeG4wfgU=
P`XBxTpxh}fp-un*UVt2`

literal 0
HcmV?d00001

-- 
2.34.1