You can subscribe to this list here.
| 2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(32) |
Jun
(66) |
Jul
(102) |
Aug
(78) |
Sep
(106) |
Oct
(137) |
Nov
(147) |
Dec
(147) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2010 |
Jan
(71) |
Feb
(139) |
Mar
(86) |
Apr
(76) |
May
(57) |
Jun
(10) |
Jul
(12) |
Aug
(6) |
Sep
(8) |
Oct
(12) |
Nov
(12) |
Dec
(18) |
| 2011 |
Jan
(16) |
Feb
(19) |
Mar
(3) |
Apr
(1) |
May
(16) |
Jun
(17) |
Jul
(74) |
Aug
(22) |
Sep
(18) |
Oct
(24) |
Nov
(21) |
Dec
(30) |
| 2012 |
Jan
(31) |
Feb
(16) |
Mar
(22) |
Apr
(25) |
May
(18) |
Jun
(13) |
Jul
(83) |
Aug
(49) |
Sep
(20) |
Oct
(60) |
Nov
(35) |
Dec
(28) |
| 2013 |
Jan
(39) |
Feb
(61) |
Mar
(35) |
Apr
(21) |
May
(45) |
Jun
(56) |
Jul
(20) |
Aug
(9) |
Sep
(10) |
Oct
(31) |
Nov
(8) |
Dec
(4) |
| 2014 |
Jan
(6) |
Feb
(7) |
Mar
(7) |
Apr
(6) |
May
(4) |
Jun
(8) |
Jul
(5) |
Aug
(2) |
Sep
(4) |
Oct
(4) |
Nov
(11) |
Dec
(5) |
| 2015 |
Jan
(4) |
Feb
(4) |
Mar
(3) |
Apr
(4) |
May
(9) |
Jun
(4) |
Jul
(15) |
Aug
(8) |
Sep
(16) |
Oct
(18) |
Nov
(15) |
Dec
(7) |
| 2016 |
Jan
(20) |
Feb
(9) |
Mar
(15) |
Apr
(24) |
May
(16) |
Jun
(28) |
Jul
(22) |
Aug
(23) |
Sep
(18) |
Oct
(30) |
Nov
(40) |
Dec
(9) |
| 2017 |
Jan
(1) |
Feb
(8) |
Mar
(37) |
Apr
(26) |
May
(25) |
Jun
(46) |
Jul
(24) |
Aug
(9) |
Sep
|
Oct
|
Nov
|
Dec
|
|
From: tip-bot f. M. H. <mhi...@re...> - 2010-01-13 10:35:36
|
Commit-ID: aa5add93e92019018e905146f8c3d3f8e3c08300 Gitweb: http://git.kernel.org/tip/aa5add93e92019018e905146f8c3d3f8e3c08300 Author: Masami Hiramatsu <mhi...@re...> AuthorDate: Tue, 5 Jan 2010 17:46:56 -0500 Committer: Ingo Molnar <mi...@el...> CommitDate: Wed, 13 Jan 2010 10:09:12 +0100 x86/ptrace: Remove unused regs_get_argument_nth API Because of dropping function argument syntax from kprobe-tracer, we don't need this API anymore. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Arnaldo Carvalho de Melo <ac...@re...> Cc: systemtap <sys...@so...> Cc: DLE <dle...@li...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Roland McGrath <ro...@re...> Cc: Oleg Nesterov <ol...@re...> Cc: Mahesh Salgaonkar <ma...@li...> Cc: Benjamin Herrenschmidt <be...@ke...> Cc: Michael Neuling <mi...@ne...> Cc: Steven Rostedt <ro...@go...> Cc: lin...@oz... LKML-Reference: <201...@dh...> Signed-off-by: Ingo Molnar <mi...@el...> --- arch/x86/include/asm/ptrace.h | 4 ---- arch/x86/kernel/ptrace.c | 24 ------------------------ 2 files changed, 0 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 9d369f6..2010280 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, return 0; } -/* Get Nth argument at function call */ -extern unsigned long regs_get_argument_nth(struct pt_regs *regs, - unsigned int n); - /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 017d937..73554a3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -140,30 +140,6 @@ static const int arg_offs_table[] = { #endif }; -/** - * regs_get_argument_nth() - get Nth argument at function call - * @regs: pt_regs which contains registers at function entry. - * @n: argument number. - * - * regs_get_argument_nth() returns @n th argument of a function call. - * Since usually the kernel stack will be changed right after function entry, - * you must use this at function entry. If the @n th entry is NOT in the - * kernel stack or pt_regs, this returns 0. - */ -unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) -{ - if (n < ARRAY_SIZE(arg_offs_table)) - return *(unsigned long *)((char *)regs + arg_offs_table[n]); - else { - /* - * The typical case: arg n is on the stack. - * (Note: stack[0] = return address, so skip it) - */ - n -= ARRAY_SIZE(arg_offs_table); - return regs_get_kernel_stack_nth(regs, 1 + n); - } -} - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. |
|
From: tip-bot f. M. H. <mhi...@re...> - 2010-01-13 10:35:24
|
Commit-ID: 72041334b8c75ae7e1da2f17ba2b7afee8f2abd7 Gitweb: http://git.kernel.org/tip/72041334b8c75ae7e1da2f17ba2b7afee8f2abd7 Author: Masami Hiramatsu <mhi...@re...> AuthorDate: Tue, 5 Jan 2010 17:47:10 -0500 Committer: Ingo Molnar <mi...@el...> CommitDate: Wed, 13 Jan 2010 10:09:13 +0100 perf probe: Show probe list in pager Show probe list in pager, because the list can be longer than a page. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Arnaldo Carvalho de Melo <ac...@re...> Cc: systemtap <sys...@so...> Cc: DLE <dle...@li...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Paul Mackerras <pa...@sa...> Cc: Arnaldo Carvalho de Melo <ac...@re...> Cc: Peter Zijlstra <pe...@in...> Cc: Mike Galbraith <ef...@gm...> LKML-Reference: <201...@dh...> Signed-off-by: Ingo Molnar <mi...@el...> --- tools/perf/util/probe-event.c | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8e532d9..a22141a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -37,6 +37,7 @@ #include "string.h" #include "strlist.h" #include "debug.h" +#include "cache.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" @@ -455,6 +456,8 @@ void show_perf_probe_events(void) struct strlist *rawlist; struct str_node *ent; + setup_pager(); + fd = open_kprobe_events(O_RDONLY, 0); rawlist = get_trace_kprobe_event_rawlist(fd); close(fd); |
|
From: tip-bot f. M. H. <mhi...@re...> - 2010-01-13 10:35:15
|
Commit-ID: 14640106f243a3b29944d7198569090fa6546f2d Gitweb: http://git.kernel.org/tip/14640106f243a3b29944d7198569090fa6546f2d Author: Masami Hiramatsu <mhi...@re...> AuthorDate: Tue, 5 Jan 2010 17:46:48 -0500 Committer: Ingo Molnar <mi...@el...> CommitDate: Wed, 13 Jan 2010 10:09:12 +0100 tracing/kprobe: Drop function argument access syntax Drop function argument access syntax, because the function arguments depend on not only architecture but also compile-options and function API. And now, we have perf-probe for finding register/memory assigned to each argument. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Arnaldo Carvalho de Melo <ac...@re...> Cc: systemtap <sys...@so...> Cc: DLE <dle...@li...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Steven Rostedt <ro...@go...> Cc: Roland McGrath <ro...@re...> Cc: Oleg Nesterov <ol...@re...> Cc: Mahesh Salgaonkar <ma...@li...> Cc: Benjamin Herrenschmidt <be...@ke...> Cc: Michael Neuling <mi...@ne...> Cc: lin...@oz... LKML-Reference: <201...@dh...> Signed-off-by: Ingo Molnar <mi...@el...> --- Documentation/trace/kprobetrace.txt | 21 ++++++++++----------- kernel/trace/trace_kprobe.c | 18 +----------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index c3eff6f..f30978e 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -37,15 +37,12 @@ Synopsis of kprobe_events @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. - $argN : Fetch function argument. (N >= 0)(*) - $retval : Fetch return value.(**) - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + $retval : Fetch return value.(*) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) NAME=FETCHARG: Set NAME as the argument name of FETCHARG. - (*) aN may not correct on asmlinkaged functions and at the middle of - function body. - (**) only for return probe. - (***) this is useful for fetching a field of data structures. + (*) only for return probe. + (**) this is useful for fetching a field of data structures. Per-Probe Event Filtering @@ -82,11 +79,14 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording -1st to 4th arguments as "myprobe" event. As this example shows, users can -choose more familiar names for each arguments. +1st to 4th arguments as "myprobe" event. Note, which register/stack entry is +assigned to each function argument depends on arch-specific ABI. If you unsure +the ABI, please try to use probe subcommand of perf-tools (you can find it +under tools/perf/). +As this example shows, users can choose more familiar names for each arguments. echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events @@ -147,4 +147,3 @@ events, you need to enable it. returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel returns from do_sys_open to sys_open+0x1b). - diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 47f54ab..7ac728d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) return retval; } -static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) -{ - return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); -} - static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, void *dummy) { @@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) { int ret = -EINVAL; - if (ff->func == fetch_argument) - ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); - else if (ff->func == fetch_register) { + if (ff->func == fetch_register) { const char *name; name = regs_query_register_name((unsigned int)((long)ff->data)); ret = snprintf(buf, n, "%%%s", name); @@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) } } else ret = -EINVAL; - } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { - ret = strict_strtoul(arg + 3, 10, ¶m); - if (ret || param > PARAM_MAX_ARGS) - ret = -EINVAL; - else { - ff->func = fetch_argument; - ff->data = (void *)param; - } } else ret = -EINVAL; return ret; @@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv) * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] * Fetch args: - * $argN : fetch Nth of function argument. (N:0-) * $retval : fetch return value * $stack : fetch stack address * $stackN : fetch Nth of stack (N:0-) |
|
From: tip-bot f. M. H. <mhi...@re...> - 2010-01-13 10:35:01
|
Commit-ID: ec3a9039601af210fca4650d229621fe5a21df0b Gitweb: http://git.kernel.org/tip/ec3a9039601af210fca4650d229621fe5a21df0b Author: Masami Hiramatsu <mhi...@re...> AuthorDate: Tue, 5 Jan 2010 17:46:41 -0500 Committer: Ingo Molnar <mi...@el...> CommitDate: Wed, 13 Jan 2010 10:09:11 +0100 tracing/kprobe: Update example output in documentation Update example output in documentation according to current implementation. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Arnaldo Carvalho de Melo <ac...@re...> Cc: systemtap <sys...@so...> Cc: DLE <dle...@li...> Cc: Steven Rostedt <ro...@go...> Cc: Frederic Weisbecker <fwe...@gm...> LKML-Reference: <201...@dh...> Signed-off-by: Ingo Molnar <mi...@el...> --- Documentation/trace/kprobetrace.txt | 33 +++++++++++++++++---------------- 1 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 47aabee..c3eff6f 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -97,23 +97,24 @@ recording return value as "myretprobe" event. cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format name: myprobe -ID: 75 +ID: 780 format: - field:unsigned short common_type; offset:0; size:2; - field:unsigned char common_flags; offset:2; size:1; - field:unsigned char common_preempt_count; offset:3; size:1; - field:int common_pid; offset:4; size:4; - field:int common_tgid; offset:8; size:4; - - field: unsigned long ip; offset:16;tsize:8; - field: int nargs; offset:24;tsize:4; - field: unsigned long dfd; offset:32;tsize:8; - field: unsigned long filename; offset:40;tsize:8; - field: unsigned long flags; offset:48;tsize:8; - field: unsigned long mode; offset:56;tsize:8; - -print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode - + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1;signed:0; + field:int common_pid; offset:4; size:4; signed:1; + field:int common_lock_depth; offset:8; size:4; signed:1; + + field:unsigned long __probe_ip; offset:12; size:4; signed:0; + field:int __probe_nargs; offset:16; size:4; signed:1; + field:unsigned long dfd; offset:20; size:4; signed:0; + field:unsigned long filename; offset:24; size:4; signed:0; + field:unsigned long flags; offset:28; size:4; signed:0; + field:unsigned long mode; offset:32; size:4; signed:0; + + +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip, +REC->dfd, REC->filename, REC->flags, REC->mode You can see that the event has 4 arguments as in the expressions you specified. |
|
From: Steven R. <ro...@go...> - 2010-01-06 18:54:10
|
On Tue, 2010-01-05 at 17:46 -0500, Masami Hiramatsu wrote: > Update example output in documentation accroding to current > implementation. > > Signed-off-by: Masami Hiramatsu <mhi...@re...> > Cc: Ingo Molnar <mi...@el...> > Cc: Steven Rostedt <ro...@go...> Acked-by: Steven Rostedt <ro...@go...> -- Steve > Cc: Frederic Weisbecker <fwe...@gm...> > --- > > Documentation/trace/kprobetrace.txt | 33 +++++++++++++++++---------------- > 1 files changed, 17 insertions(+), 16 deletions(-) > > diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt > index 47aabee..c3eff6f 100644 > --- a/Documentation/trace/kprobetrace.txt > +++ b/Documentation/trace/kprobetrace.txt > @@ -97,23 +97,24 @@ recording return value as "myretprobe" event. > > cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format > name: myprobe > -ID: 75 > +ID: 780 > format: > - field:unsigned short common_type; offset:0; size:2; > - field:unsigned char common_flags; offset:2; size:1; > - field:unsigned char common_preempt_count; offset:3; size:1; > - field:int common_pid; offset:4; size:4; > - field:int common_tgid; offset:8; size:4; > - > - field: unsigned long ip; offset:16;tsize:8; > - field: int nargs; offset:24;tsize:4; > - field: unsigned long dfd; offset:32;tsize:8; > - field: unsigned long filename; offset:40;tsize:8; > - field: unsigned long flags; offset:48;tsize:8; > - field: unsigned long mode; offset:56;tsize:8; > - > -print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode > - > + field:unsigned short common_type; offset:0; size:2; signed:0; > + field:unsigned char common_flags; offset:2; size:1; signed:0; > + field:unsigned char common_preempt_count; offset:3; size:1;signed:0; > + field:int common_pid; offset:4; size:4; signed:1; > + field:int common_lock_depth; offset:8; size:4; signed:1; > + > + field:unsigned long __probe_ip; offset:12; size:4; signed:0; > + field:int __probe_nargs; offset:16; size:4; signed:1; > + field:unsigned long dfd; offset:20; size:4; signed:0; > + field:unsigned long filename; offset:24; size:4; signed:0; > + field:unsigned long flags; offset:28; size:4; signed:0; > + field:unsigned long mode; offset:32; size:4; signed:0; > + > + > +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip, > +REC->dfd, REC->filename, REC->flags, REC->mode > > You can see that the event has 4 arguments as in the expressions you specified. > > > |
|
From: Masami H. <mhi...@re...> - 2010-01-06 14:39:28
|
Add --line option to support showing probable source-code lines.
perf probe --line SRC:LN[-LN|+NUM]
or
perf probe --line FUNC[:LN[-LN|+NUM]]
This option shows source-code with line number if the line can be
probed. Lines without line number (and blue color) means that the
line can not be probed, because debuginfo doesn't have the
information of those lines.
The argument specifies the range of lines, "source.c:100-120" shows
lines between 100th to l20th in source.c file. And "func:10+20"
shows 20 lines from 10th line of func function.
e.g.
# ./perf probe --line kernel/sched.c:1080
<kernel/sched.c:1080>
*
* called with rq->lock held and irqs disabled
*/
static void hrtick_start(struct rq *rq, u64 delay)
{
struct hrtimer *timer = &rq->hrtick_timer;
1086 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
hrtimer_set_expires(timer, time);
1090 if (rq == this_rq()) {
1091 hrtimer_restart(timer);
1092 } else if (!rq->hrtick_csd_pending) {
1093 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd,
1094 rq->hrtick_csd_pending = 1;
If you specifying function name, this shows function-relative line number.
# ./perf probe --line schedule
<schedule:0>
asmlinkage void __sched schedule(void)
1 {
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
preempt_disable();
9 cpu = smp_processor_id();
10 rq = cpu_rq(cpu);
11 rcu_sched_qs(cpu);
12 prev = rq->curr;
13 switch_count = &prev->nivcsw;
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Paul Mackerras <pa...@sa...>
Cc: Arnaldo Carvalho de Melo <ac...@re...>
Cc: Peter Zijlstra <pe...@in...>
Cc: Mike Galbraith <ef...@gm...>
---
tools/perf/Documentation/perf-probe.txt | 20 +++
tools/perf/builtin-probe.c | 76 ++++++++++--
tools/perf/util/probe-event.c | 100 ++++++++++++++++
tools/perf/util/probe-event.h | 2
tools/perf/util/probe-finder.c | 191 ++++++++++++++++++++++++++++++-
tools/perf/util/probe-finder.h | 31 +++++
6 files changed, 402 insertions(+), 18 deletions(-)
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 250e391..2de3407 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -15,6 +15,8 @@ or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list
+or
+'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
DESCRIPTION
-----------
@@ -45,6 +47,11 @@ OPTIONS
--list::
List up current probe events.
+-L::
+--line=::
+ Show source code lines which can be probed. This needs an argument
+ which specifies a range of the source code.
+
PROBE SYNTAX
------------
Probe points are defined by following syntax.
@@ -56,6 +63,19 @@ Probe points are defined by following syntax.
It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
+LINE SYNTAX
+-----------
+Line range is descripted by following syntax.
+
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index ffdd3fe..1d3a99e 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,11 +55,13 @@ static struct {
bool need_dwarf;
bool list_events;
bool force_add;
+ bool show_lines;
int nr_probe;
struct probe_point probes[MAX_PROBES];
struct strlist *dellist;
struct perf_session *psession;
struct map *kmap;
+ struct line_range line_range;
} session;
@@ -116,6 +118,15 @@ static int opt_del_probe_event(const struct option *opt __used,
return 0;
}
+static int opt_show_lines(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ if (str)
+ parse_line_range_desc(str, &session.line_range);
+ INIT_LIST_HEAD(&session.line_range.line_list);
+ session.show_lines = true;
+ return 0;
+}
/* Currently just checking function name from symbol map */
static void evaluate_probe_point(struct probe_point *pp)
{
@@ -144,6 +155,7 @@ static const char * const probe_usage[] = {
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
+ "perf probe --line 'LINEDESC'",
NULL
};
@@ -182,9 +194,32 @@ static const struct option options[] = {
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
" with existing name"),
+#ifndef NO_LIBDWARF
+ OPT_CALLBACK('L', "line", NULL,
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
+ "Show source code lines.", opt_show_lines),
+#endif
OPT_END()
};
+/* Initialize symbol maps for vmlinux */
+static void init_vmlinux(void)
+{
+ symbol_conf.sort_by_name = true;
+ if (symbol_conf.vmlinux_name == NULL)
+ symbol_conf.try_vmlinux_path = true;
+ else
+ pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+ if (symbol__init() < 0)
+ die("Failed to init symbol map.");
+ session.psession = perf_session__new(NULL, O_WRONLY, false);
+ if (session.psession == NULL)
+ die("Failed to init perf_session.");
+ session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
+ if (!session.kmap)
+ die("Could not find kernel map.\n");
+}
+
int cmd_probe(int argc, const char **argv, const char *prefix __used)
{
int i, ret;
@@ -203,7 +238,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
parse_probe_event_argv(argc, argv);
}
- if ((!session.nr_probe && !session.dellist && !session.list_events))
+ if ((!session.nr_probe && !session.dellist && !session.list_events &&
+ !session.show_lines))
usage_with_options(probe_usage, options);
if (debugfs_valid_mountpoint(debugfs_path) < 0)
@@ -215,10 +251,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
+ if (session.show_lines) {
+ pr_warning(" Error: Don't use --list with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
show_perf_probe_events();
return 0;
}
+#ifndef NO_LIBDWARF
+ if (session.show_lines) {
+ if (session.nr_probe != 0 || session.dellist) {
+ pr_warning(" Error: Don't use --line with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ init_vmlinux();
+ fd = open_vmlinux();
+ if (fd < 0)
+ die("Could not open debuginfo file.");
+ ret = find_line_range(fd, &session.line_range);
+ if (ret <= 0)
+ die("Source line is not found.\n");
+ close(fd);
+ show_line_range(&session.line_range);
+ return 0;
+ }
+#endif
+
if (session.dellist) {
del_trace_kprobe_events(session.dellist);
strlist__delete(session.dellist);
@@ -226,18 +286,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
return 0;
}
- /* Initialize symbol maps for vmlinux */
- symbol_conf.sort_by_name = true;
- if (symbol_conf.vmlinux_name == NULL)
- symbol_conf.try_vmlinux_path = true;
- if (symbol__init() < 0)
- die("Failed to init symbol map.");
- session.psession = perf_session__new(NULL, O_WRONLY, false);
- if (session.psession == NULL)
- die("Failed to init perf_session.");
- session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
- if (!session.kmap)
- die("Could not find kernel map.\n");
+ /* Add probes */
+ init_vmlinux();
if (session.need_dwarf)
#ifdef NO_LIBDWARF
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a22141a..71b0dd5 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -38,6 +38,7 @@
#include "strlist.h"
#include "debug.h"
#include "cache.h"
+#include "color.h"
#include "parse-events.h" /* For debugfs_path */
#include "probe-event.h"
@@ -63,6 +64,42 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
return ret;
}
+void parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+ const char *ptr;
+ char *tmp;
+ /*
+ * <Syntax>
+ * SRC:SLN[+NUM|-ELN]
+ * FUNC[:SLN[+NUM|-ELN]]
+ */
+ ptr = strchr(arg, ':');
+ if (ptr) {
+ lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0);
+ if (*tmp == '+')
+ lr->end = lr->start + (unsigned int)strtoul(tmp + 1,
+ &tmp, 0);
+ else if (*tmp == '-')
+ lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0);
+ else
+ lr->end = 0;
+ pr_debug("Line range is %u to %u\n", lr->start, lr->end);
+ if (lr->end && lr->start > lr->end)
+ semantic_error("Start line must be smaller"
+ " than end line.");
+ if (*tmp != '\0')
+ semantic_error("Tailing with invalid character '%d'.",
+ *tmp);
+ tmp = strndup(arg, (ptr - arg));
+ } else
+ tmp = strdup(arg);
+
+ if (strchr(tmp, '.'))
+ lr->file = tmp;
+ else
+ lr->function = tmp;
+}
+
/* Check the name is good for event/group */
static bool check_event_name(const char *name)
{
@@ -678,3 +715,66 @@ void del_trace_kprobe_events(struct strlist *dellist)
close(fd);
}
+#define LINEBUF_SIZE 256
+
+static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
+{
+ char buf[LINEBUF_SIZE];
+ const char *color = PERF_COLOR_BLUE;
+
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%7u %s", l, buf);
+ else
+ color_fprintf(stdout, color, " %s", buf);
+ }
+
+ while (strlen(buf) == LINEBUF_SIZE - 1 &&
+ buf[LINEBUF_SIZE - 2] != '\n') {
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%s", buf);
+ else
+ color_fprintf(stdout, color, "%s", buf);
+ }
+ }
+ return;
+error:
+ if (feof(fp))
+ die("Source file is shorter than expected.");
+ else
+ die("File read error: %s", strerror(errno));
+}
+
+void show_line_range(struct line_range *lr)
+{
+ unsigned int l = 1;
+ struct line_node *ln;
+ FILE *fp;
+
+ setup_pager();
+
+ if (lr->function)
+ fprintf(stdout, "<%s:%d>\n", lr->function,
+ lr->start - lr->offset);
+ else
+ fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+
+ fp = fopen(lr->path, "r");
+ if (fp == NULL)
+ die("Failed to open %s: %s", lr->path, strerror(errno));
+ /* Skip to starting line number */
+ while (l < lr->start)
+ show_one_line(fp, l++, true, false);
+
+ list_for_each_entry(ln, &lr->line_list, list) {
+ while (ln->line > l)
+ show_one_line(fp, (l++) - lr->offset, false, false);
+ show_one_line(fp, (l++) - lr->offset, false, true);
+ }
+ fclose(fp);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7f1d499..711287d 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -5,6 +5,7 @@
#include "probe-finder.h"
#include "strlist.h"
+extern void parse_line_range_desc(const char *arg, struct line_range *lr);
extern void parse_perf_probe_event(const char *str, struct probe_point *pp,
bool *need_dwarf);
extern int synthesize_perf_probe_point(struct probe_point *pp);
@@ -15,6 +16,7 @@ extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes,
bool force_add);
extern void del_trace_kprobe_events(struct strlist *dellist);
extern void show_perf_probe_events(void);
+extern void show_line_range(struct line_range *lr);
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 6402798..1b2124d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -140,6 +140,31 @@ static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
return found;
}
+static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
+{
+ Dwarf_Signed cnt, i;
+ char **srcs;
+ int ret = 0;
+
+ if (!buf || !fno)
+ return -EINVAL;
+
+ ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
+ if (ret == DW_DLV_OK) {
+ if ((Dwarf_Unsigned)cnt > fno - 1) {
+ *buf = strdup(srcs[fno - 1]);
+ ret = 0;
+ pr_debug("found filename: %s\n", *buf);
+ } else
+ ret = -ENOENT;
+ for (i = 0; i < cnt; i++)
+ dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+ dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+ } else
+ ret = -EINVAL;
+ return ret;
+}
+
/* Compare diename and tname */
static int die_compare_name(Dwarf_Die dw_die, const char *tname)
{
@@ -567,7 +592,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data)
}
/* Find probe point from its line number */
-static void find_by_line(struct probe_finder *pf)
+static void find_probe_point_by_line(struct probe_finder *pf)
{
Dwarf_Signed cnt, i, clm;
Dwarf_Line *lines;
@@ -626,7 +651,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
pf->fno = die_get_decl_file(dlink->die);
pf->lno = die_get_decl_line(dlink->die)
+ pp->line;
- find_by_line(pf);
+ find_probe_point_by_line(pf);
return 1;
}
if (die_inlined_subprogram(dlink->die)) {
@@ -673,7 +698,7 @@ found:
return 0;
}
-static void find_by_func(struct probe_finder *pf)
+static void find_probe_point_by_func(struct probe_finder *pf)
{
search_die_from_children(pf->cu_die, probefunc_callback, pf);
}
@@ -714,10 +739,10 @@ int find_probepoint(int fd, struct probe_point *pp)
if (ret == DW_DLV_NO_ENTRY)
pf.cu_base = 0;
if (pp->function)
- find_by_func(&pf);
+ find_probe_point_by_func(&pf);
else {
pf.lno = pp->line;
- find_by_line(&pf);
+ find_probe_point_by_line(&pf);
}
}
dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
@@ -728,3 +753,159 @@ int find_probepoint(int fd, struct probe_point *pp)
return pp->found;
}
+
+static void line_range_add_line(struct line_range *lr, unsigned int line)
+{
+ struct line_node *ln;
+ struct list_head *p;
+
+ /* Reverse search, because new line will be the last one */
+ list_for_each_entry_reverse(ln, &lr->line_list, list) {
+ if (ln->line < line) {
+ p = &ln->list;
+ goto found;
+ } else if (ln->line == line) /* Already exist */
+ return ;
+ }
+ /* List is empty, or the smallest entry */
+ p = &lr->line_list;
+found:
+ pr_debug("Debug: add a line %u\n", line);
+ ln = zalloc(sizeof(struct line_node));
+ DIE_IF(ln == NULL);
+ ln->line = line;
+ INIT_LIST_HEAD(&ln->list);
+ list_add(&ln->list, p);
+}
+
+/* Find line range from its line number */
+static void find_line_range_by_line(struct line_finder *lf)
+{
+ Dwarf_Signed cnt, i;
+ Dwarf_Line *lines;
+ Dwarf_Unsigned lineno = 0;
+ Dwarf_Unsigned fno;
+ Dwarf_Addr addr;
+ int ret;
+
+ ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ for (i = 0; i < cnt; i++) {
+ ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (fno != lf->fno)
+ continue;
+
+ ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->lno_s > lineno || lf->lno_e < lineno)
+ continue;
+
+ /* Filter line in the function address range */
+ if (lf->addr_s && lf->addr_e) {
+ ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->addr_s > addr || lf->addr_e <= addr)
+ continue;
+ }
+ line_range_add_line(lf->lr, (unsigned int)lineno);
+ }
+ dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+ if (!list_empty(&lf->lr->line_list))
+ lf->found = 1;
+}
+
+/* Search function from function name */
+static int linefunc_callback(struct die_link *dlink, void *data)
+{
+ struct line_finder *lf = (struct line_finder *)data;
+ struct line_range *lr = lf->lr;
+ Dwarf_Half tag;
+ int ret;
+
+ ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (tag == DW_TAG_subprogram &&
+ die_compare_name(dlink->die, lr->function) == 0) {
+ /* Get the address range of this function */
+ ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
+ if (ret == DW_DLV_OK)
+ ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY) {
+ lf->addr_s = 0;
+ lf->addr_e = 0;
+ }
+
+ lf->fno = die_get_decl_file(dlink->die);
+ lr->offset = die_get_decl_line(dlink->die);;
+ lf->lno_s = lr->offset + lr->start;
+ if (!lr->end)
+ lf->lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf->lno_e = lr->offset + lr->end;
+ lr->start = lf->lno_s;
+ lr->end = lf->lno_e;
+ find_line_range_by_line(lf);
+ /* If we find a target function, this should be end. */
+ lf->found = 1;
+ return 1;
+ }
+ return 0;
+}
+
+static void find_line_range_by_func(struct line_finder *lf)
+{
+ search_die_from_children(lf->cu_die, linefunc_callback, lf);
+}
+
+int find_line_range(int fd, struct line_range *lr)
+{
+ Dwarf_Half addr_size = 0;
+ Dwarf_Unsigned next_cuh = 0;
+ int ret;
+ struct line_finder lf = {.lr = lr};
+
+ ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
+ if (ret != DW_DLV_OK)
+ return -ENOENT;
+
+ while (!lf.found) {
+ /* Search CU (Compilation Unit) */
+ ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
+ &addr_size, &next_cuh, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY)
+ break;
+
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ /* Check if target file is included. */
+ if (lr->file)
+ lf.fno = cu_find_fileno(lf.cu_die, lr->file);
+
+ if (!lr->file || lf.fno) {
+ if (lr->function)
+ find_line_range_by_func(&lf);
+ else {
+ lf.lno_s = lr->start;
+ if (!lr->end)
+ lf.lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf.lno_e = lr->end;
+ find_line_range_by_line(&lf);
+ }
+ /* Get the real file path */
+ if (lf.found)
+ cu_get_filename(lf.cu_die, lf.fno, &lr->path);
+ }
+ dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
+ }
+ ret = dwarf_finish(__dw_debug, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ return lf.found;
+}
+
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index e3f3968..972b386 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,8 +34,26 @@ struct probe_point {
char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
};
+/* Line number container */
+struct line_node {
+ struct list_head list;
+ unsigned int line;
+};
+
+/* Line range */
+struct line_range {
+ char *file; /* File name */
+ char *function; /* Function name */
+ unsigned int start; /* Start line number */
+ unsigned int end; /* End line number */
+ unsigned int offset; /* Start line offset */
+ char *path; /* Real path name */
+ struct list_head line_list; /* Visible lines */
+};
+
#ifndef NO_LIBDWARF
extern int find_probepoint(int fd, struct probe_point *pp);
+extern int find_line_range(int fd, struct line_range *lr);
/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
#ifndef _MIPS_SZLONG
@@ -62,6 +80,19 @@ struct probe_finder {
char *buf; /* Current output buffer */
int len; /* Length of output buffer */
};
+
+struct line_finder {
+ struct line_range *lr; /* Target line range */
+
+ Dwarf_Unsigned fno; /* File number */
+ Dwarf_Unsigned lno_s; /* Start line number */
+ Dwarf_Unsigned lno_e; /* End line number */
+ Dwarf_Addr addr_s; /* Start address */
+ Dwarf_Addr addr_e; /* End address */
+ Dwarf_Die cu_die; /* Current CU */
+ int found;
+};
+
#endif /* NO_LIBDWARF */
#endif /*_PROBE_FINDER_H */
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-06 14:32:22
|
Masami Hiramatsu wrote: > Add --line option to support showing probable source-code lines. > > perf probe --line SRC:LN[-LN|+NUM] > or > perf probe --line FUNC[:LN[-LN|+NUM]] > > This option shows source-code with line number if the line can be > probed. Lines without line number (and blue color) means that the > line can not be probed, because debuginfo doesn't have the > information of those lines. > > The argument specifies the range of lines, "source.c:100-120" shows > lines between 100th to l20th in source.c file. And "func:10+20" > shows 20 lines from 10th line of func function. Oops, I found this will conflict with Arnaldo's patch. I'll update this on his patches. Thank you, -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America), Inc. Software Solutions Division e-mail: mhi...@re... |
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:38
|
Because of dropping function argument syntax from kprobe-tracer,
we don't need this API anymore.
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Roland McGrath <ro...@re...>
Cc: Oleg Nesterov <ol...@re...>
Cc: Mahesh Salgaonkar <ma...@li...>
Cc: Benjamin Herrenschmidt <be...@ke...>
Cc: Michael Neuling <mi...@ne...>
Cc: Steven Rostedt <ro...@go...>
Cc: lin...@oz...
---
arch/x86/include/asm/ptrace.h | 4 ----
arch/x86/kernel/ptrace.c | 24 ------------------------
2 files changed, 0 insertions(+), 28 deletions(-)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f6..2010280 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
return 0;
}
-/* Get Nth argument at function call */
-extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
- unsigned int n);
-
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 017d937..73554a3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -140,30 +140,6 @@ static const int arg_offs_table[] = {
#endif
};
-/**
- * regs_get_argument_nth() - get Nth argument at function call
- * @regs: pt_regs which contains registers at function entry.
- * @n: argument number.
- *
- * regs_get_argument_nth() returns @n th argument of a function call.
- * Since usually the kernel stack will be changed right after function entry,
- * you must use this at function entry. If the @n th entry is NOT in the
- * kernel stack or pt_regs, this returns 0.
- */
-unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
-{
- if (n < ARRAY_SIZE(arg_offs_table))
- return *(unsigned long *)((char *)regs + arg_offs_table[n]);
- else {
- /*
- * The typical case: arg n is on the stack.
- * (Note: stack[0] = return address, so skip it)
- */
- n -= ARRAY_SIZE(arg_offs_table);
- return regs_get_kernel_stack_nth(regs, 1 + n);
- }
-}
-
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:35
|
Enhance strglobmatch() for supporting character classes([CHARS],
complementation and ranges are also supported) and escaped special
characters (\*, \? etc).
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Paul Mackerras <pa...@sa...>
Cc: Arnaldo Carvalho de Melo <ac...@re...>
Cc: Peter Zijlstra <pe...@in...>
Cc: Mike Galbraith <ef...@gm...>
---
tools/perf/util/string.c | 65 +++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 5352d7d..c397d4f 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -227,16 +227,73 @@ fail:
return NULL;
}
-/* Glob expression pattern matching */
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+ bool complement = false, ret = true;
+
+ if (*pat == '!') {
+ complement = true;
+ pat++;
+ }
+ if (*pat++ == c) /* First character is special */
+ goto end;
+
+ while (*pat && *pat != ']') { /* Matching */
+ if (*pat == '-' && *(pat + 1) != ']') { /* Range */
+ if (*(pat - 1) <= c && c <= *(pat + 1))
+ goto end;
+ if (*(pat - 1) > *(pat + 1))
+ goto error;
+ pat += 2;
+ } else if (*pat++ == c)
+ goto end;
+ }
+ if (!*pat)
+ goto error;
+ ret = false;
+
+end:
+ while (*pat && *pat != ']') /* Searching closing */
+ pat++;
+ if (!*pat)
+ goto error;
+ *npat = pat + 1;
+ return complement ? !ret : ret;
+
+error:
+ return false;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
bool strglobmatch(const char *str, const char *pat)
{
while (*str && *pat && *pat != '*') {
- if (*pat == '?') {
+ if (*pat == '?') { /* Matches any single character */
str++;
pat++;
- } else
- if (*str++ != *pat++)
+ continue;
+ } else if (*pat == '[') /* Character classes/Ranges */
+ if (__match_charclass(pat + 1, *str, &pat)) {
+ str++;
+ continue;
+ } else
return false;
+ else if (*pat == '\\') /* Escaped char match as normal char */
+ pat++;
+ if (*str++ != *pat++)
+ return false;
}
/* Check wild card */
if (*pat == '*') {
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:31
|
Support glob wildcard when selecting tracepoint events by -e option.
Without this patch, perf-tools supports 'GROUP:*:record' syntax for
selecting all tracepoints under GROUP group. With this patch, user
can choose tracepoints more flexible by using partial wildcard,
e.g. 'block:*bio*:record'.
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Paul Mackerras <pa...@sa...>
Cc: Arnaldo Carvalho de Melo <ac...@re...>
Cc: Peter Zijlstra <pe...@in...>
Cc: Mike Galbraith <ef...@gm...>
---
tools/perf/util/parse-events.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 609d5a9..05d0c5c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -450,7 +450,8 @@ parse_single_tracepoint_event(char *sys_name,
/* sys + ':' + event + ':' + flags*/
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
static enum event_result
-parse_subsystem_tracepoint_event(char *sys_name, char *flags)
+parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
+ char *flags)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
@@ -474,6 +475,9 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
|| !strcmp(evt_ent->d_name, "filter"))
continue;
+ if (!strglobmatch(evt_ent->d_name, evt_exp))
+ continue;
+
len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
evt_ent->d_name, flags ? ":" : "",
flags ?: "");
@@ -522,9 +526,10 @@ static enum event_result parse_tracepoint_event(const char **strp,
if (evt_length >= MAX_EVENT_LENGTH)
return EVT_FAILED;
- if (!strcmp(evt_name, "*")) {
+ if (strpbrk(evt_name, "*?")) {
*strp = evt_name + evt_length;
- return parse_subsystem_tracepoint_event(sys_name, flags);
+ return parse_multiple_tracepoint_event(sys_name, evt_name,
+ flags);
} else
return parse_single_tracepoint_event(sys_name, evt_name,
evt_length, flags,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:30
|
Add --line option to support showing probable source-code lines.
perf probe --line SRC:LN[-LN|+NUM]
or
perf probe --line FUNC[:LN[-LN|+NUM]]
This option shows source-code with line number if the line can be
probed. Lines without line number (and blue color) means that the
line can not be probed, because debuginfo doesn't have the
information of those lines.
The argument specifies the range of lines, "source.c:100-120" shows
lines between 100th to l20th in source.c file. And "func:10+20"
shows 20 lines from 10th line of func function.
e.g.
# ./perf probe --line kernel/sched.c:1080
<kernel/sched.c:1080>
*
* called with rq->lock held and irqs disabled
*/
static void hrtick_start(struct rq *rq, u64 delay)
{
struct hrtimer *timer = &rq->hrtick_timer;
1086 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
hrtimer_set_expires(timer, time);
1090 if (rq == this_rq()) {
1091 hrtimer_restart(timer);
1092 } else if (!rq->hrtick_csd_pending) {
1093 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd,
1094 rq->hrtick_csd_pending = 1;
If you specifying function name, this shows function-relative line number.
# ./perf probe --line schedule
<schedule:0>
asmlinkage void __sched schedule(void)
1 {
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
need_resched:
preempt_disable();
9 cpu = smp_processor_id();
10 rq = cpu_rq(cpu);
11 rcu_sched_qs(cpu);
12 prev = rq->curr;
13 switch_count = &prev->nivcsw;
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Paul Mackerras <pa...@sa...>
Cc: Arnaldo Carvalho de Melo <ac...@re...>
Cc: Peter Zijlstra <pe...@in...>
Cc: Mike Galbraith <ef...@gm...>
---
tools/perf/Documentation/perf-probe.txt | 20 +++
tools/perf/builtin-probe.c | 80 +++++++++++--
tools/perf/util/probe-event.c | 100 ++++++++++++++++
tools/perf/util/probe-event.h | 2
tools/perf/util/probe-finder.c | 191 ++++++++++++++++++++++++++++++-
tools/perf/util/probe-finder.h | 31 +++++
6 files changed, 404 insertions(+), 20 deletions(-)
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 250e391..2de3407 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -15,6 +15,8 @@ or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list
+or
+'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
DESCRIPTION
-----------
@@ -45,6 +47,11 @@ OPTIONS
--list::
List up current probe events.
+-L::
+--line=::
+ Show source code lines which can be probed. This needs an argument
+ which specifies a range of the source code.
+
PROBE SYNTAX
------------
Probe points are defined by following syntax.
@@ -56,6 +63,19 @@ Probe points are defined by following syntax.
It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
+LINE SYNTAX
+-----------
+Line range is descripted by following syntax.
+
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c1e6774..803d3af 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,11 +55,13 @@ static struct {
bool need_dwarf;
bool list_events;
bool force_add;
+ bool show_lines;
int nr_probe;
struct probe_point probes[MAX_PROBES];
struct strlist *dellist;
struct perf_session *psession;
struct map *kmap;
+ struct line_range line_range;
} session;
@@ -116,6 +118,15 @@ static int opt_del_probe_event(const struct option *opt __used,
return 0;
}
+static int opt_show_lines(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ if (str)
+ parse_line_range_desc(str, &session.line_range);
+ INIT_LIST_HEAD(&session.line_range.line_list);
+ session.show_lines = true;
+ return 0;
+}
/* Currently just checking function name from symbol map */
static void evaluate_probe_point(struct probe_point *pp)
{
@@ -144,6 +155,7 @@ static const char * const probe_usage[] = {
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
+ "perf probe --line 'LINEDESC'",
NULL
};
@@ -182,9 +194,34 @@ static const struct option options[] = {
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
" with existing name"),
+#ifndef NO_LIBDWARF
+ OPT_CALLBACK('L', "line", NULL,
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
+ "Show source code lines.", opt_show_lines),
+#endif
OPT_END()
};
+/* Initialize symbol maps for vmlinux */
+static void init_vmlinux(void)
+{
+ symbol_conf.sort_by_name = true;
+ if (symbol_conf.vmlinux_name == NULL)
+ symbol_conf.try_vmlinux_path = true;
+ else
+ pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+ if (symbol__init() < 0)
+ die("Failed to init symbol map.");
+ session.psession = perf_session__new(NULL, O_WRONLY, false);
+ if (session.psession == NULL)
+ die("Failed to init perf_session.");
+ session.kmap = map_groups__find_by_name(&session.psession->kmaps,
+ MAP__FUNCTION,
+ "[kernel.kallsyms]");
+ if (!session.kmap)
+ die("Could not find kernel map.\n");
+}
+
int cmd_probe(int argc, const char **argv, const char *prefix __used)
{
int i, ret;
@@ -203,7 +240,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
parse_probe_event_argv(argc, argv);
}
- if ((!session.nr_probe && !session.dellist && !session.list_events))
+ if ((!session.nr_probe && !session.dellist && !session.list_events &&
+ !session.show_lines))
usage_with_options(probe_usage, options);
if (debugfs_valid_mountpoint(debugfs_path) < 0)
@@ -215,10 +253,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
+ if (session.show_lines) {
+ pr_warning(" Error: Don't use --list with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
show_perf_probe_events();
return 0;
}
+#ifndef NO_LIBDWARF
+ if (session.show_lines) {
+ if (session.nr_probe != 0 || session.dellist) {
+ pr_warning(" Error: Don't use --line with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ init_vmlinux();
+ fd = open_vmlinux();
+ if (fd < 0)
+ die("Could not open debuginfo file.");
+ ret = find_line_range(fd, &session.line_range);
+ if (ret <= 0)
+ die("Source line is not found.\n");
+ close(fd);
+ show_line_range(&session.line_range);
+ return 0;
+ }
+#endif
+
if (session.dellist) {
del_trace_kprobe_events(session.dellist);
strlist__delete(session.dellist);
@@ -226,20 +288,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
return 0;
}
- /* Initialize symbol maps for vmlinux */
- symbol_conf.sort_by_name = true;
- if (symbol_conf.vmlinux_name == NULL)
- symbol_conf.try_vmlinux_path = true;
- if (symbol__init() < 0)
- die("Failed to init symbol map.");
- session.psession = perf_session__new(NULL, O_WRONLY, false);
- if (session.psession == NULL)
- die("Failed to init perf_session.");
- session.kmap = map_groups__find_by_name(&session.psession->kmaps,
- MAP__FUNCTION,
- "[kernel.kallsyms]");
- if (!session.kmap)
- die("Could not find kernel map.\n");
+ /* Add probes */
+ init_vmlinux();
if (session.need_dwarf)
#ifdef NO_LIBDWARF
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a22141a..71b0dd5 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -38,6 +38,7 @@
#include "strlist.h"
#include "debug.h"
#include "cache.h"
+#include "color.h"
#include "parse-events.h" /* For debugfs_path */
#include "probe-event.h"
@@ -63,6 +64,42 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
return ret;
}
+void parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+ const char *ptr;
+ char *tmp;
+ /*
+ * <Syntax>
+ * SRC:SLN[+NUM|-ELN]
+ * FUNC[:SLN[+NUM|-ELN]]
+ */
+ ptr = strchr(arg, ':');
+ if (ptr) {
+ lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0);
+ if (*tmp == '+')
+ lr->end = lr->start + (unsigned int)strtoul(tmp + 1,
+ &tmp, 0);
+ else if (*tmp == '-')
+ lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0);
+ else
+ lr->end = 0;
+ pr_debug("Line range is %u to %u\n", lr->start, lr->end);
+ if (lr->end && lr->start > lr->end)
+ semantic_error("Start line must be smaller"
+ " than end line.");
+ if (*tmp != '\0')
+ semantic_error("Tailing with invalid character '%d'.",
+ *tmp);
+ tmp = strndup(arg, (ptr - arg));
+ } else
+ tmp = strdup(arg);
+
+ if (strchr(tmp, '.'))
+ lr->file = tmp;
+ else
+ lr->function = tmp;
+}
+
/* Check the name is good for event/group */
static bool check_event_name(const char *name)
{
@@ -678,3 +715,66 @@ void del_trace_kprobe_events(struct strlist *dellist)
close(fd);
}
+#define LINEBUF_SIZE 256
+
+static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
+{
+ char buf[LINEBUF_SIZE];
+ const char *color = PERF_COLOR_BLUE;
+
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%7u %s", l, buf);
+ else
+ color_fprintf(stdout, color, " %s", buf);
+ }
+
+ while (strlen(buf) == LINEBUF_SIZE - 1 &&
+ buf[LINEBUF_SIZE - 2] != '\n') {
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%s", buf);
+ else
+ color_fprintf(stdout, color, "%s", buf);
+ }
+ }
+ return;
+error:
+ if (feof(fp))
+ die("Source file is shorter than expected.");
+ else
+ die("File read error: %s", strerror(errno));
+}
+
+void show_line_range(struct line_range *lr)
+{
+ unsigned int l = 1;
+ struct line_node *ln;
+ FILE *fp;
+
+ setup_pager();
+
+ if (lr->function)
+ fprintf(stdout, "<%s:%d>\n", lr->function,
+ lr->start - lr->offset);
+ else
+ fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+
+ fp = fopen(lr->path, "r");
+ if (fp == NULL)
+ die("Failed to open %s: %s", lr->path, strerror(errno));
+ /* Skip to starting line number */
+ while (l < lr->start)
+ show_one_line(fp, l++, true, false);
+
+ list_for_each_entry(ln, &lr->line_list, list) {
+ while (ln->line > l)
+ show_one_line(fp, (l++) - lr->offset, false, false);
+ show_one_line(fp, (l++) - lr->offset, false, true);
+ }
+ fclose(fp);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7f1d499..711287d 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -5,6 +5,7 @@
#include "probe-finder.h"
#include "strlist.h"
+extern void parse_line_range_desc(const char *arg, struct line_range *lr);
extern void parse_perf_probe_event(const char *str, struct probe_point *pp,
bool *need_dwarf);
extern int synthesize_perf_probe_point(struct probe_point *pp);
@@ -15,6 +16,7 @@ extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes,
bool force_add);
extern void del_trace_kprobe_events(struct strlist *dellist);
extern void show_perf_probe_events(void);
+extern void show_line_range(struct line_range *lr);
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 6402798..1b2124d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -140,6 +140,31 @@ static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
return found;
}
+static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
+{
+ Dwarf_Signed cnt, i;
+ char **srcs;
+ int ret = 0;
+
+ if (!buf || !fno)
+ return -EINVAL;
+
+ ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
+ if (ret == DW_DLV_OK) {
+ if ((Dwarf_Unsigned)cnt > fno - 1) {
+ *buf = strdup(srcs[fno - 1]);
+ ret = 0;
+ pr_debug("found filename: %s\n", *buf);
+ } else
+ ret = -ENOENT;
+ for (i = 0; i < cnt; i++)
+ dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+ dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+ } else
+ ret = -EINVAL;
+ return ret;
+}
+
/* Compare diename and tname */
static int die_compare_name(Dwarf_Die dw_die, const char *tname)
{
@@ -567,7 +592,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data)
}
/* Find probe point from its line number */
-static void find_by_line(struct probe_finder *pf)
+static void find_probe_point_by_line(struct probe_finder *pf)
{
Dwarf_Signed cnt, i, clm;
Dwarf_Line *lines;
@@ -626,7 +651,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
pf->fno = die_get_decl_file(dlink->die);
pf->lno = die_get_decl_line(dlink->die)
+ pp->line;
- find_by_line(pf);
+ find_probe_point_by_line(pf);
return 1;
}
if (die_inlined_subprogram(dlink->die)) {
@@ -673,7 +698,7 @@ found:
return 0;
}
-static void find_by_func(struct probe_finder *pf)
+static void find_probe_point_by_func(struct probe_finder *pf)
{
search_die_from_children(pf->cu_die, probefunc_callback, pf);
}
@@ -714,10 +739,10 @@ int find_probepoint(int fd, struct probe_point *pp)
if (ret == DW_DLV_NO_ENTRY)
pf.cu_base = 0;
if (pp->function)
- find_by_func(&pf);
+ find_probe_point_by_func(&pf);
else {
pf.lno = pp->line;
- find_by_line(&pf);
+ find_probe_point_by_line(&pf);
}
}
dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
@@ -728,3 +753,159 @@ int find_probepoint(int fd, struct probe_point *pp)
return pp->found;
}
+
+static void line_range_add_line(struct line_range *lr, unsigned int line)
+{
+ struct line_node *ln;
+ struct list_head *p;
+
+ /* Reverse search, because new line will be the last one */
+ list_for_each_entry_reverse(ln, &lr->line_list, list) {
+ if (ln->line < line) {
+ p = &ln->list;
+ goto found;
+ } else if (ln->line == line) /* Already exist */
+ return ;
+ }
+ /* List is empty, or the smallest entry */
+ p = &lr->line_list;
+found:
+ pr_debug("Debug: add a line %u\n", line);
+ ln = zalloc(sizeof(struct line_node));
+ DIE_IF(ln == NULL);
+ ln->line = line;
+ INIT_LIST_HEAD(&ln->list);
+ list_add(&ln->list, p);
+}
+
+/* Find line range from its line number */
+static void find_line_range_by_line(struct line_finder *lf)
+{
+ Dwarf_Signed cnt, i;
+ Dwarf_Line *lines;
+ Dwarf_Unsigned lineno = 0;
+ Dwarf_Unsigned fno;
+ Dwarf_Addr addr;
+ int ret;
+
+ ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ for (i = 0; i < cnt; i++) {
+ ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (fno != lf->fno)
+ continue;
+
+ ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->lno_s > lineno || lf->lno_e < lineno)
+ continue;
+
+ /* Filter line in the function address range */
+ if (lf->addr_s && lf->addr_e) {
+ ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->addr_s > addr || lf->addr_e <= addr)
+ continue;
+ }
+ line_range_add_line(lf->lr, (unsigned int)lineno);
+ }
+ dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+ if (!list_empty(&lf->lr->line_list))
+ lf->found = 1;
+}
+
+/* Search function from function name */
+static int linefunc_callback(struct die_link *dlink, void *data)
+{
+ struct line_finder *lf = (struct line_finder *)data;
+ struct line_range *lr = lf->lr;
+ Dwarf_Half tag;
+ int ret;
+
+ ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (tag == DW_TAG_subprogram &&
+ die_compare_name(dlink->die, lr->function) == 0) {
+ /* Get the address range of this function */
+ ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
+ if (ret == DW_DLV_OK)
+ ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY) {
+ lf->addr_s = 0;
+ lf->addr_e = 0;
+ }
+
+ lf->fno = die_get_decl_file(dlink->die);
+ lr->offset = die_get_decl_line(dlink->die);;
+ lf->lno_s = lr->offset + lr->start;
+ if (!lr->end)
+ lf->lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf->lno_e = lr->offset + lr->end;
+ lr->start = lf->lno_s;
+ lr->end = lf->lno_e;
+ find_line_range_by_line(lf);
+ /* If we find a target function, this should be end. */
+ lf->found = 1;
+ return 1;
+ }
+ return 0;
+}
+
+static void find_line_range_by_func(struct line_finder *lf)
+{
+ search_die_from_children(lf->cu_die, linefunc_callback, lf);
+}
+
+int find_line_range(int fd, struct line_range *lr)
+{
+ Dwarf_Half addr_size = 0;
+ Dwarf_Unsigned next_cuh = 0;
+ int ret;
+ struct line_finder lf = {.lr = lr};
+
+ ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
+ if (ret != DW_DLV_OK)
+ return -ENOENT;
+
+ while (!lf.found) {
+ /* Search CU (Compilation Unit) */
+ ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
+ &addr_size, &next_cuh, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY)
+ break;
+
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ /* Check if target file is included. */
+ if (lr->file)
+ lf.fno = cu_find_fileno(lf.cu_die, lr->file);
+
+ if (!lr->file || lf.fno) {
+ if (lr->function)
+ find_line_range_by_func(&lf);
+ else {
+ lf.lno_s = lr->start;
+ if (!lr->end)
+ lf.lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf.lno_e = lr->end;
+ find_line_range_by_line(&lf);
+ }
+ /* Get the real file path */
+ if (lf.found)
+ cu_get_filename(lf.cu_die, lf.fno, &lr->path);
+ }
+ dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
+ }
+ ret = dwarf_finish(__dw_debug, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ return lf.found;
+}
+
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index e3f3968..972b386 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,8 +34,26 @@ struct probe_point {
char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
};
+/* Line number container */
+struct line_node {
+ struct list_head list;
+ unsigned int line;
+};
+
+/* Line range */
+struct line_range {
+ char *file; /* File name */
+ char *function; /* Function name */
+ unsigned int start; /* Start line number */
+ unsigned int end; /* End line number */
+ unsigned int offset; /* Start line offset */
+ char *path; /* Real path name */
+ struct list_head line_list; /* Visible lines */
+};
+
#ifndef NO_LIBDWARF
extern int find_probepoint(int fd, struct probe_point *pp);
+extern int find_line_range(int fd, struct line_range *lr);
/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
#ifndef _MIPS_SZLONG
@@ -62,6 +80,19 @@ struct probe_finder {
char *buf; /* Current output buffer */
int len; /* Length of output buffer */
};
+
+struct line_finder {
+ struct line_range *lr; /* Target line range */
+
+ Dwarf_Unsigned fno; /* File number */
+ Dwarf_Unsigned lno_s; /* Start line number */
+ Dwarf_Unsigned lno_e; /* End line number */
+ Dwarf_Addr addr_s; /* Start address */
+ Dwarf_Addr addr_e; /* End address */
+ Dwarf_Die cu_die; /* Current CU */
+ int found;
+};
+
#endif /* NO_LIBDWARF */
#endif /*_PROBE_FINDER_H */
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:23
|
Drop function argument access syntax, because the function arguments
are depends on not only architecture but also compile-options and
function API. And now, we have perf-probe for finding register/memory
assigned to each argument.
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Steven Rostedt <ro...@go...>
Cc: Roland McGrath <ro...@re...>
Cc: Oleg Nesterov <ol...@re...>
Cc: Mahesh Salgaonkar <ma...@li...>
Cc: Benjamin Herrenschmidt <be...@ke...>
Cc: Michael Neuling <mi...@ne...>
Cc: lin...@oz...
---
Documentation/trace/kprobetrace.txt | 21 ++++++++++-----------
kernel/trace/trace_kprobe.c | 18 +-----------------
2 files changed, 11 insertions(+), 28 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index c3eff6f..f30978e 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -37,15 +37,12 @@ Synopsis of kprobe_events
@SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
- $argN : Fetch function argument. (N >= 0)(*)
- $retval : Fetch return value.(**)
- +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
- (*) aN may not correct on asmlinkaged functions and at the middle of
- function body.
- (**) only for return probe.
- (***) this is useful for fetching a field of data structures.
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
Per-Probe Event Filtering
@@ -82,11 +79,14 @@ Usage examples
To add a probe as a new event, write a new definition to kprobe_events
as below.
- echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+ echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events
This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event. As this example shows, users can
-choose more familiar names for each arguments.
+1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
+assigned to each function argument depends on arch-specific ABI. If you unsure
+the ABI, please try to use probe subcommand of perf-tools (you can find it
+under tools/perf/).
+As this example shows, users can choose more familiar names for each arguments.
echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
@@ -147,4 +147,3 @@ events, you need to enable it.
returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
returns from do_sys_open to sys_open+0x1b).
-
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 47f54ab..7ac728d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
return retval;
}
-static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
-{
- return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
-}
-
static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
void *dummy)
{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
- if (ff->func == fetch_argument)
- ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
- else if (ff->func == fetch_register) {
+ if (ff->func == fetch_register) {
const char *name;
name = regs_query_register_name((unsigned int)((long)ff->data));
ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
}
} else
ret = -EINVAL;
- } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
- ret = strict_strtoul(arg + 3, 10, ¶m);
- if (ret || param > PARAM_MAX_ARGS)
- ret = -EINVAL;
- else {
- ff->func = fetch_argument;
- ff->data = (void *)param;
- }
} else
ret = -EINVAL;
return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
* - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
- * $argN : fetch Nth of function argument. (N:0-)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:22
|
Update example output in documentation accroding to current implementation. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Ingo Molnar <mi...@el...> Cc: Steven Rostedt <ro...@go...> Cc: Frederic Weisbecker <fwe...@gm...> --- Documentation/trace/kprobetrace.txt | 33 +++++++++++++++++---------------- 1 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 47aabee..c3eff6f 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -97,23 +97,24 @@ recording return value as "myretprobe" event. cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format name: myprobe -ID: 75 +ID: 780 format: - field:unsigned short common_type; offset:0; size:2; - field:unsigned char common_flags; offset:2; size:1; - field:unsigned char common_preempt_count; offset:3; size:1; - field:int common_pid; offset:4; size:4; - field:int common_tgid; offset:8; size:4; - - field: unsigned long ip; offset:16;tsize:8; - field: int nargs; offset:24;tsize:4; - field: unsigned long dfd; offset:32;tsize:8; - field: unsigned long filename; offset:40;tsize:8; - field: unsigned long flags; offset:48;tsize:8; - field: unsigned long mode; offset:56;tsize:8; - -print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode - + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1;signed:0; + field:int common_pid; offset:4; size:4; signed:1; + field:int common_lock_depth; offset:8; size:4; signed:1; + + field:unsigned long __probe_ip; offset:12; size:4; signed:0; + field:int __probe_nargs; offset:16; size:4; signed:1; + field:unsigned long dfd; offset:20; size:4; signed:0; + field:unsigned long filename; offset:24; size:4; signed:0; + field:unsigned long flags; offset:28; size:4; signed:0; + field:unsigned long mode; offset:32; size:4; signed:0; + + +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip, +REC->dfd, REC->filename, REC->flags, REC->mode You can see that the event has 4 arguments as in the expressions you specified. -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America), Inc. Software Solutions Division e-mail: mhi...@re... |
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:42:20
|
Hi Ingo,
Here are several bugfixes and updates of perf-probe and
kprobe-tracer.
This updates includes --line option support which you are
waiting for :-).
For kprobe-tracer, I decided to drop $argN support, because
the ABI for each function strongly depends on not only the
architecture but also the API of the function and gcc options.
Anyway, we already have perf-probe which allows us to find
register/memory assignment of each arguments.
Here are updated todo list.
Long-term TODOs (future features):
- Support lazy string matching(glob?) for selecting probing
line
- Support sys_perf_counter_open (for non-root users)
- Support tracing static variables (non global)
- Support variable types from debuginfo (e.g. char, int, ...)
- Support fields of data structures (var->field)
- Support array (var[N])
- Support dynamic array-indexing (var[var2])
- Support string/dynamic arrays (*var, var[N..M])
- Support force type-casting ((type)var)
- Support the type of return value
Miscs:
- Better support for probes on modules
- Move onto libdw/libdwfl
- Storing file name/line number information in the
kernel for listing events
Thank you,
---
Masami Hiramatsu (8):
perf probe: Support --line option to show probable source-code lines
perf tools: Enhance glob string matching
perf tools: Support tracepoint glob matching
perf probe: Show probe list in pager
[CLEANUP] perf probe: Remove newline from die()
x86/ptrace: Remove unused regs_get_argument_nth API
tracing/kprobe: Drop function argument access syntax
tracing/kprobe: Update example output in documentation
Documentation/trace/kprobetrace.txt | 48 ++++---
arch/x86/include/asm/ptrace.h | 4 -
arch/x86/kernel/ptrace.c | 24 ----
kernel/trace/trace_kprobe.c | 18 ---
tools/perf/Documentation/perf-probe.txt | 20 +++
tools/perf/builtin-probe.c | 80 ++++++++++--
tools/perf/util/parse-events.c | 11 +-
tools/perf/util/probe-event.c | 103 ++++++++++++++++
tools/perf/util/probe-event.h | 2
tools/perf/util/probe-finder.c | 203 +++++++++++++++++++++++++++++--
tools/perf/util/probe-finder.h | 31 +++++
tools/perf/util/string.c | 65 +++++++++-
12 files changed, 507 insertions(+), 102 deletions(-)
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:41:10
|
Show probe list in pager, because the list can be longer than a page. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Ingo Molnar <mi...@el...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: Paul Mackerras <pa...@sa...> Cc: Arnaldo Carvalho de Melo <ac...@re...> Cc: Peter Zijlstra <pe...@in...> Cc: Mike Galbraith <ef...@gm...> --- tools/perf/util/probe-event.c | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8e532d9..a22141a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -37,6 +37,7 @@ #include "string.h" #include "strlist.h" #include "debug.h" +#include "cache.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" @@ -455,6 +456,8 @@ void show_perf_probe_events(void) struct strlist *rawlist; struct str_node *ent; + setup_pager(); + fd = open_kprobe_events(O_RDONLY, 0); rawlist = get_trace_kprobe_event_rawlist(fd); close(fd); -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America), Inc. Software Solutions Division e-mail: mhi...@re... |
|
From: Masami H. <mhi...@re...> - 2010-01-05 22:41:09
|
Remove newline from die(), because it is automatically added.
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ingo Molnar <mi...@el...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: Paul Mackerras <pa...@sa...>
Cc: Arnaldo Carvalho de Melo <ac...@re...>
Cc: Peter Zijlstra <pe...@in...>
Cc: Mike Galbraith <ef...@gm...>
---
tools/perf/util/probe-finder.c | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4b852c0..6402798 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -402,11 +402,11 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
} else if (op == DW_OP_regx) {
regn = loc->lr_number;
} else
- die("Dwarf_OP %d is not supported.\n", op);
+ die("Dwarf_OP %d is not supported.", op);
regs = get_arch_regstr(regn);
if (!regs)
- die("%lld exceeds max register number.\n", regn);
+ die("%lld exceeds max register number.", regn);
if (deref)
ret = snprintf(pf->buf, pf->len,
@@ -438,7 +438,7 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
return ;
error:
die("Failed to find the location of %s at this address.\n"
- " Perhaps, it has been optimized out.\n", pf->var);
+ " Perhaps, it has been optimized out.", pf->var);
}
static int variable_callback(struct die_link *dlink, void *data)
@@ -476,7 +476,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
/* Search child die for local variables and parameters. */
ret = search_die_from_children(sp_die, variable_callback, pf);
if (!ret)
- die("Failed to find '%s' in this function.\n", pf->var);
+ die("Failed to find '%s' in this function.", pf->var);
}
/* Get a frame base on the address */
@@ -602,7 +602,7 @@ static void find_by_line(struct probe_finder *pf)
ret = search_die_from_children(pf->cu_die,
probeaddr_callback, pf);
if (ret == 0)
- die("Probe point is not found in subprograms.\n");
+ die("Probe point is not found in subprograms.");
/* Continuing, because target line might be inlined. */
}
dwarf_srclines_dealloc(__dw_debug, lines, cnt);
@@ -661,7 +661,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
!die_inlined_subprogram(lk->die))
goto found;
}
- die("Failed to find real subprogram.\n");
+ die("Failed to find real subprogram.");
found:
/* Get offset from subprogram */
ret = die_within_subprogram(lk->die, pf->addr, &offs);
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: KOSAKI M. <kos...@jp...> - 2009-12-21 01:53:57
|
> Pass mm->flags as a coredump parameter for consistency.
>
> ---
> 1787 if (mm->core_state || !get_dumpable(mm)) { <- (1)
> 1788 up_write(&mm->mmap_sem);
> 1789 put_cred(cred);
> 1790 goto fail;
> 1791 }
> 1792
> [...]
> 1798 if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ <-(2)
> 1799 flag = O_EXCL; /* Stop rewrite attacks */
> 1800 cred->fsuid = 0; /* Dump root private */
> 1801 }
> ---
>
> Since dumpable bits are not protected by lock, there is a
> chance to change these bits between (1) and (2).
>
> To solve this issue, this patch copies mm->flags to
> coredump_params.mm_flags at the beginning of do_coredump() and uses it instead of get_dumpable() while dumping core.
>
> This copy is also passed to binfmt->core_dump, since
> elf*_core_dump() uses dump_filter bits in mm->flags.
>
> Signed-off-by: Masami Hiramatsu <mhi...@re...>
> Cc: Roland McGrath <ro...@re...>
> Cc: Hidehiro Kawai <hid...@hi...>
> Cc: Andrew Morton <ak...@li...>
> Cc: Oleg Nesterov <ol...@re...>
> Cc: Ingo Molnar <mi...@el...>
> Cc: KOSAKI Motohiro <kos...@jp...>
looks good to me.
Reviewed-by: KOSAKI Motohiro <kos...@jp...>
|
|
From: Roland M. <ro...@re...> - 2009-12-18 19:53:11
|
Acked-by: Roland McGrath <ro...@re...> |
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:13:05
|
Use text_poke_fixup() for jump optimization instead of text_poke_smp(). Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Ananth N Mavinakayanahalli <an...@in...> Cc: Ingo Molnar <mi...@el...> Cc: Jim Keniston <jke...@us...> Cc: Srikar Dronamraju <sr...@li...> Cc: Christoph Hellwig <hc...@in...> Cc: Steven Rostedt <ro...@go...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: H. Peter Anvin <hp...@zy...> Cc: Anders Kaseorg <an...@ks...> Cc: Tim Abbott <ta...@ks...> Cc: Andi Kleen <an...@fi...> Cc: Jason Baron <jb...@re...> Cc: Mathieu Desnoyers <co...@kr...> --- arch/x86/kernel/kprobes.c | 10 +++------- 1 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 9d2c873..8f2e0d2 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1376,12 +1376,8 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) jmp_code[0] = RELATIVEJUMP_OPCODE; *(s32 *)(&jmp_code[1]) = rel; - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); + text_poke_fixup(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE, + op->optinsn.insn); return 0; } @@ -1393,7 +1389,7 @@ void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) /* Set int3 to first byte for kprobes */ buf[0] = BREAKPOINT_INSTRUCTION; memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); + text_poke_fixup(op->kp.addr, buf, RELATIVEJUMP_SIZE, op->optinsn.insn); } static int __kprobes setup_detour_execution(struct kprobe *p, -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America), Inc. Software Solutions Division e-mail: mhi...@re... |
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:12:54
|
Introduce x86 arch-specific optimization code, which supports both of
x86-32 and x86-64.
This code also supports safety checking, which decodes whole of a function
in which probe is inserted, and checks following conditions before
optimization:
- The optimized instructions which will be replaced by a jump instruction
don't straddle the function boundary.
- There is no indirect jump instruction, because it will jumps into
the address range which is replaced by jump operand.
- There is no jump/loop instruction which jumps into the address range
which is replaced by jump operand.
- Don't optimize kprobes if it is in functions into which fixup code will
jumps.
This uses text_poke_multibyte() which doesn't support modifying code on
NMI/MCE handler. However, since kprobes itself doesn't support NMI/MCE
code probing, it's not a problem.
Changes in v6:
- Split stop_machine-based jump patching code.
- Update comments and coding style.
Changes in v5:
- Introduce stop_machine-based jump replacing.
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ananth N Mavinakayanahalli <an...@in...>
Cc: Ingo Molnar <mi...@el...>
Cc: Jim Keniston <jke...@us...>
Cc: Srikar Dronamraju <sr...@li...>
Cc: Christoph Hellwig <hc...@in...>
Cc: Steven Rostedt <ro...@go...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: H. Peter Anvin <hp...@zy...>
Cc: Anders Kaseorg <an...@ks...>
Cc: Tim Abbott <ta...@ks...>
Cc: Andi Kleen <an...@fi...>
Cc: Jason Baron <jb...@re...>
Cc: Mathieu Desnoyers <co...@kr...>
---
arch/x86/Kconfig | 1
arch/x86/include/asm/kprobes.h | 29 +++
arch/x86/kernel/kprobes.c | 420 ++++++++++++++++++++++++++++++++++++++--
3 files changed, 428 insertions(+), 22 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbc1b98..9da8db5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,6 +31,7 @@ config X86
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
select HAVE_KRETPROBES
+ select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index eaec8ea..4ffa345 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -33,6 +33,9 @@ struct kprobe;
typedef u8 kprobe_opcode_t;
#define BREAKPOINT_INSTRUCTION 0xcc
#define RELATIVEJUMP_OPCODE 0xe9
+#define RELATIVEJUMP_SIZE 5
+#define RELATIVECALL_OPCODE 0xe8
+#define RELATIVE_ADDR_SIZE 4
#define MAX_INSN_SIZE 16
#define MAX_STACK_SIZE 64
#define MIN_STACK_SIZE(ADDR) \
@@ -44,6 +47,17 @@ typedef u8 kprobe_opcode_t;
#define flush_insn_slot(p) do { } while (0)
+/* optinsn template addresses */
+extern kprobe_opcode_t optprobe_template_entry;
+extern kprobe_opcode_t optprobe_template_val;
+extern kprobe_opcode_t optprobe_template_call;
+extern kprobe_opcode_t optprobe_template_end;
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTINSN_SIZE \
+ (((unsigned long)&optprobe_template_end - \
+ (unsigned long)&optprobe_template_entry) + \
+ MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+
extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
@@ -64,6 +78,21 @@ struct arch_specific_insn {
int boostable;
};
+struct arch_optimized_insn {
+ /* copy of the original instructions */
+ kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+ /* detour code buffer */
+ kprobe_opcode_t *insn;
+ /* the size of instructions copied to detour code buffer */
+ size_t size;
+};
+
+/* Return true (!0) if optinsn is prepared for optimization. */
+static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+ return optinsn->size;
+}
+
struct prev_kprobe {
struct kprobe *kp;
unsigned long status;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f848984..9d2c873 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -106,16 +106,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
};
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
-/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes set_jmp_op(void *from, void *to)
+static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
{
- struct __arch_jmp_op {
- char op;
+ struct __arch_relative_insn {
+ u8 op;
s32 raddr;
- } __attribute__((packed)) * jop;
- jop = (struct __arch_jmp_op *)from;
- jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
- jop->op = RELATIVEJUMP_OPCODE;
+ } __attribute__((packed)) *insn;
+
+ insn = (struct __arch_relative_insn *)from;
+ insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
+ insn->op = op;
+}
+
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static void __kprobes synthesize_reljump(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
}
/*
@@ -202,7 +208,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
/*
* Basically, kp->ainsn.insn has an original instruction.
* However, RIP-relative instruction can not do single-stepping
- * at different place, fix_riprel() tweaks the displacement of
+ * at different place, __copy_instruction() tweaks the displacement of
* that instruction. In that case, we can't recover the instruction
* from the kp->ainsn.insn.
*
@@ -284,21 +290,37 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
}
/*
- * Adjust the displacement if the instruction uses the %rip-relative
- * addressing mode.
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
* If it does, Return the address of the 32-bit displacement word.
* If not, return null.
* Only applicable to 64-bit x86.
*/
-static void __kprobes fix_riprel(struct kprobe *p)
+static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
{
-#ifdef CONFIG_X86_64
struct insn insn;
- kernel_insn_init(&insn, p->ainsn.insn);
+ int ret;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+ kernel_insn_init(&insn, src);
+ if (recover) {
+ insn_get_opcode(&insn);
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf,
+ (unsigned long)src);
+ if (ret)
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ }
+ insn_get_length(&insn);
+ memcpy(dest, insn.kaddr, insn.length);
+
+#ifdef CONFIG_X86_64
if (insn_rip_relative(&insn)) {
s64 newdisp;
u8 *disp;
+ kernel_insn_init(&insn, dest);
insn_get_displacement(&insn);
/*
* The copied instruction uses the %rip-relative addressing
@@ -312,20 +334,23 @@ static void __kprobes fix_riprel(struct kprobe *p)
* extension of the original signed 32-bit displacement would
* have given.
*/
- newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
- (u8 *) p->ainsn.insn;
+ newdisp = (u8 *) src + (s64) insn.displacement.value -
+ (u8 *) dest;
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
- disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+ disp = (u8 *) dest + insn_offset_displacement(&insn);
*(s32 *) disp = (s32) newdisp;
}
#endif
+ return insn.length;
}
static void __kprobes arch_copy_kprobe(struct kprobe *p)
{
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-
- fix_riprel(p);
+ /*
+ * Copy an instruction without recovering int3, because it will be
+ * put by another subsystem.
+ */
+ __copy_instruction(p->ainsn.insn, p->addr, 0);
if (can_boost(p->addr))
p->ainsn.boostable = 0;
@@ -414,9 +439,20 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
*sara = (unsigned long) &kretprobe_trampoline;
}
+#ifdef CONFIG_OPTPROBES
+static int __kprobes setup_detour_execution(struct kprobe *p,
+ struct pt_regs *regs,
+ int reenter);
+#else
+#define setup_detour_execution(p, regs, reenter) (0)
+#endif
+
static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, int reenter)
{
+ if (setup_detour_execution(p, regs, reenter))
+ return;
+
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER)
if (p->ainsn.boostable == 1 && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
@@ -812,8 +848,8 @@ static void __kprobes resume_execution(struct kprobe *p,
* These instructions can be executed directly if it
* jumps back to correct address.
*/
- set_jmp_op((void *)regs->ip,
- (void *)orig_ip + (regs->ip - copy_ip));
+ synthesize_reljump((void *)regs->ip,
+ (void *)orig_ip + (regs->ip - copy_ip));
p->ainsn.boostable = 1;
} else {
p->ainsn.boostable = -1;
@@ -1040,6 +1076,346 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
+
+#ifdef CONFIG_OPTPROBES
+
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+static void __kprobes synthesize_relcall(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
+ unsigned long val)
+{
+#ifdef CONFIG_X86_64
+ *addr++ = 0x48;
+ *addr++ = 0xbf;
+#else
+ *addr++ = 0xb8;
+#endif
+ *(unsigned long *)addr = val;
+}
+
+void __kprobes kprobes_optinsn_template_holder(void)
+{
+ asm volatile (
+ ".global optprobe_template_entry\n"
+ "optprobe_template_entry: \n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rsi\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val: \n"
+ ASM_NOP5
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call: \n"
+ ASM_NOP5
+ /* Move flags to rsp */
+ " movq 144(%rsp), %rdx\n"
+ " movq %rdx, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ /* Skip flags entry */
+ " addq $8, %rsp\n"
+ " popfq\n"
+#else /* CONFIG_X86_32 */
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %edx\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val: \n"
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call: \n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " addl $4, %esp\n" /* skip cs */
+ " popf\n"
+#endif
+ ".global optprobe_template_end\n"
+ "optprobe_template_end: \n");
+}
+
+#define TMPL_MOVE_IDX \
+ ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+ ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+ ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op,
+ struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ preempt_disable();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ /* Save skipped registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
+#endif
+ regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+ regs->orig_ax = ~0UL;
+
+ __get_cpu_var(current_kprobe) = &op->kp;
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __get_cpu_var(current_kprobe) = NULL;
+ }
+ preempt_enable_no_resched();
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+ int len = 0, ret;
+
+ while (len < RELATIVEJUMP_SIZE) {
+ ret = __copy_instruction(dest + len, src + len, 1);
+ if (!ret || !can_boost(dest + len))
+ return -EINVAL;
+ len += ret;
+ }
+
+ return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+ return (insn->opcode.bytes[0] == 0xff ||
+ insn->opcode.bytes[0] == 0xea);
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+ unsigned long target = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xe0: /* loopne */
+ case 0xe1: /* loope */
+ case 0xe2: /* loop */
+ case 0xe3: /* jcxz */
+ case 0xe9: /* near relative jump */
+ case 0xeb: /* short relative jump */
+ break;
+ case 0x0f:
+ if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+ break;
+ return 0;
+ default:
+ if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+ break;
+ return 0;
+ }
+ target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+ return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+ int ret;
+ unsigned long addr, size = 0, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+ /* Dummy buffers for lookup_symbol_attrs */
+ static char __dummy_buf[KSYM_NAME_LEN];
+
+ /* Lookup symbol including addr */
+ if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
+ return 0;
+
+ /* Check there is enough space for a relative jump. */
+ if (size - offset < RELATIVEJUMP_SIZE)
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr - offset + size) { /* Decode until function end */
+ if (search_exception_tables(addr))
+ /*
+ * Since some fixup code will jumps into this function,
+ * we can't optimize kprobe in this function.
+ */
+ return 0;
+ kernel_insn_init(&insn, (void *)addr);
+ insn_get_opcode(&insn);
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf, addr);
+ if (ret)
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ insn_get_length(&insn);
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+ /* Check any instructions don't jump into target */
+ if (insn_is_indirect_jump(&insn) ||
+ insn_jump_into_range(&insn, paddr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE))
+ return 0;
+ addr += insn.length;
+ }
+
+ return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ int i;
+
+ for (i = 1; i < op->optinsn.size; i++)
+ if (get_kprobe(op->kp.addr + i))
+ return -EEXIST;
+
+ return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
+ unsigned long addr)
+{
+ return ((unsigned long)op->kp.addr <= addr &&
+ (unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+ if (op->optinsn.insn) {
+ free_optinsn_slot(op->optinsn.insn, dirty);
+ op->optinsn.insn = NULL;
+ op->optinsn.size = 0;
+ }
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ __arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+ u8 *buf;
+ int ret;
+
+ if (!can_optimize((unsigned long)op->kp.addr))
+ return -EILSEQ;
+
+ op->optinsn.insn = get_optinsn_slot();
+ if (!op->optinsn.insn)
+ return -ENOMEM;
+
+ buf = (u8 *)op->optinsn.insn;
+
+ /* Copy instructions into the out-of-line buffer */
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+ if (ret < 0) {
+ __arch_remove_optimized_kprobe(op, 0);
+ return ret;
+ }
+ op->optinsn.size = ret;
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+ /* Set probe information */
+ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+ /* Set probe function call */
+ synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+ /* Set returning jmp instruction at the tail of out-of-line buffer */
+ synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ (u8 *)op->kp.addr + op->optinsn.size);
+
+ flush_icache_range((unsigned long) buf,
+ (unsigned long) buf + TMPL_END_IDX +
+ op->optinsn.size + RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+/* Replace a breakpoint (int3) with a relative jump. */
+int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+{
+ unsigned char jmp_code[RELATIVEJUMP_SIZE];
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+ /* Check if the address gap is in 2GB range. */
+ if ((long)op->kp.addr + RELATIVEJUMP_SIZE + rel !=
+ (long)op->optinsn.insn)
+ return -EINVAL;
+
+ jmp_code[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&jmp_code[1]) = rel;
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ u8 buf[RELATIVEJUMP_SIZE];
+
+ /* Set int3 to first byte for kprobes */
+ buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+static int __kprobes setup_detour_execution(struct kprobe *p,
+ struct pt_regs *regs,
+ int reenter)
+{
+ struct optimized_kprobe *op;
+
+ if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+ /* This kprobe is really able to run optimized path. */
+ op = container_of(p, struct optimized_kprobe, kp);
+ /* Detour through copied instructions */
+ regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+ if (!reenter)
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+#endif
+
int __init arch_init_kprobes(void)
{
return 0;
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:12:49
|
Add text_poke_fixup() which takes a fixup address to where a processor
jumps if it hits the modifying address while code modifying.
text_poke_fixup() does following steps for this purpose.
1. Setup int3 handler for fixup.
2. Put a breakpoint (int3) on the first byte of modifying region,
and synchronize code on all CPUs.
3. Modify other bytes of modifying region, and synchronize code on all CPUs.
4. Modify the first byte of modifying region, and synchronize code
on all CPUs.
5. Clear int3 handler.
Thus, if some other processor execute modifying address when step2 to step4,
it will be jumped to fixup code.
This still has many limitations for modifying multi-instructions at once.
However, it is enough for 'a 5 bytes nop replacing with a jump' patching,
because;
- Replaced instruction is just one instruction, which is executed atomically.
- Replacing instruction is a jump, so we can set fixup address where the jump
goes to.
I think text_poke_fixup() can be shared with Mathieu's immediate value
without stop_machine() version, since the basic ideas are same.
Changes in v6:
- Use int3 even if len == 1 (int3 size).
Changes in v5
- Add some comments.
- Use smp_wmb()/smp_rmb()
- Remove unneeded sync_core_all()
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ananth N Mavinakayanahalli <an...@in...>
Cc: Ingo Molnar <mi...@el...>
Cc: Jim Keniston <jke...@us...>
Cc: Srikar Dronamraju <sr...@li...>
Cc: Christoph Hellwig <hc...@in...>
Cc: Steven Rostedt <ro...@go...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: H. Peter Anvin <hp...@zy...>
Cc: Anders Kaseorg <an...@ks...>
Cc: Tim Abbott <ta...@ks...>
Cc: Andi Kleen <an...@fi...>
Cc: Jason Baron <jb...@re...>
Cc: Mathieu Desnoyers <co...@kr...>
---
arch/x86/include/asm/alternative.h | 11 ++++
arch/x86/kernel/alternative.c | 102 ++++++++++++++++++++++++++++++++++++
kernel/kprobes.c | 2 -
3 files changed, 114 insertions(+), 1 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 812ee2c..fb1913d 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -163,4 +163,15 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+/*
+ * Setup int3 trap and fixup execution for cross-modifying on SMP case.
+ * If the other cpus execute modifying instruction, it will hit int3
+ * and go to fixup code. This just provides a minimal safety check.
+ * Additional checks/restrictions are required for completely safe
+ * cross-modifying.
+ */
+extern void *text_poke_fixup(void *addr, const void *opcode, size_t len,
+ void *fixup);
+extern void sync_core_all(void);
+
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7ce45d7..3117142 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/stringify.h>
#include <linux/kprobes.h>
+#include <linux/kdebug.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/memory.h>
@@ -612,3 +613,104 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
return addr;
}
+/*
+ * On pentium series, Unsynchronized cross-modifying code
+ * operations can cause unexpected instruction execution results.
+ * So after code modified, we should synchronize it on each processor.
+ */
+static void __kprobes __local_sync_core(void *info)
+{
+ sync_core();
+}
+
+void __kprobes sync_core_all(void)
+{
+ on_each_cpu(__local_sync_core, NULL, 1);
+}
+
+/* Safely cross-code modifying with fixup address */
+static void *patch_fixup_from;
+static void *patch_fixup_addr;
+
+static int __kprobes patch_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ smp_rmb();
+
+ if (likely(!patch_fixup_from))
+ return NOTIFY_DONE;
+
+ if (val != DIE_INT3 || !regs || user_mode_vm(regs) ||
+ (unsigned long)patch_fixup_from != regs->ip)
+ return NOTIFY_DONE;
+
+ args->regs->ip = (unsigned long)patch_fixup_addr;
+
+ return NOTIFY_STOP;
+}
+
+/**
+ * text_poke_fixup() -- cross-modifying kernel text with fixup address.
+ * @addr: Modifying address.
+ * @opcode: New instruction.
+ * @len: length of modifying bytes.
+ * @fixup: Fixup address.
+ *
+ * Note: You must backup replaced instructions before calling this,
+ * if you need to recover it.
+ * Note: Must be called under text_mutex.
+ */
+void *__kprobes text_poke_fixup(void *addr, const void *opcode, size_t len,
+ void *fixup)
+{
+ static const unsigned char int3_insn = BREAKPOINT_INSTRUCTION;
+ static const int int3_size = sizeof(int3_insn);
+
+ /* Preparing fixup address */
+ patch_fixup_addr = fixup;
+ patch_fixup_from = (u8 *)addr + int3_size; /* IP address after int3 */
+ smp_wmb();
+
+ /* Cap by an int3 - expecting synchronously done */
+ text_poke(addr, &int3_insn, int3_size);
+
+ if (len - int3_size > 0) {
+ /* Replace tail bytes */
+ text_poke((char *)addr + int3_size,
+ (const char *)opcode + int3_size,
+ len - int3_size);
+ /* Synchronize code cache */
+ sync_core_all();
+ }
+
+ /* Replace int3 with head byte - expecting synchronously done */
+ text_poke(addr, opcode, int3_size);
+
+ /*
+ * Sync core again - this is for waiting for disabled IRQ code
+ * quiescent state, IOW, waiting for all running int3 fixup
+ * handlers.
+ */
+ sync_core_all();
+
+ /* Cleanup fixup address */
+ patch_fixup_from = NULL;
+ smp_wmb();
+
+ return addr;
+}
+
+static struct notifier_block patch_exceptions_nb = {
+ .notifier_call = patch_exceptions_notify,
+ .priority = 0x7fffffff /* we need to be notified first */
+};
+
+static int __init patch_init(void)
+{
+ return register_die_notifier(&patch_exceptions_nb);
+}
+
+arch_initcall(patch_init);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 079e116..a16fb03 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1334,7 +1334,7 @@ EXPORT_SYMBOL_GPL(unregister_kprobes);
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
- .priority = 0x7fffffff /* we need to be notified first */
+ .priority = 0x7ffffff0 /* High priority, but not first. */
};
unsigned long __weak arch_deref_entry_point(void *entry)
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:12:41
|
Add documentations about kprobe jump optimization to Documentation/kprobes.txt. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Ananth N Mavinakayanahalli <an...@in...> Cc: Ingo Molnar <mi...@el...> Cc: Jim Keniston <jke...@us...> Cc: Srikar Dronamraju <sr...@li...> Cc: Christoph Hellwig <hc...@in...> Cc: Steven Rostedt <ro...@go...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: H. Peter Anvin <hp...@zy...> Cc: Anders Kaseorg <an...@ks...> Cc: Tim Abbott <ta...@ks...> Cc: Andi Kleen <an...@fi...> Cc: Jason Baron <jb...@re...> Cc: Mathieu Desnoyers <co...@kr...> --- Documentation/kprobes.txt | 192 ++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 179 insertions(+), 13 deletions(-) diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 053037a..e4b0504 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -1,6 +1,7 @@ Title : Kernel Probes (Kprobes) Authors : Jim Keniston <jke...@us...> : Prasanna S Panchamukhi <pra...@in...> + : Masami Hiramatsu <mhi...@re...> CONTENTS @@ -14,6 +15,7 @@ CONTENTS 8. Kprobes Example 9. Jprobes Example 10. Kretprobes Example +11. Optimization Example Appendix A: The kprobes debugfs interface 1. Concepts: Kprobes, Jprobes, Return Probes @@ -42,13 +44,13 @@ registration/unregistration of a group of *probes. These functions can speed up unregistration process when you have to unregister a lot of probes at once. -The next three subsections explain how the different types of -probes work. They explain certain things that you'll need to -know in order to make the best use of Kprobes -- e.g., the -difference between a pre_handler and a post_handler, and how -to use the maxactive and nmissed fields of a kretprobe. But -if you're in a hurry to start using Kprobes, you can skip ahead -to section 2. +The next four subsections explain how the different types of +probes work and how the optimization works. They explain certain +things that you'll need to know in order to make the best use of +Kprobes -- e.g., the difference between a pre_handler and +a post_handler, and how to use the maxactive and nmissed fields of +a kretprobe. But if you're in a hurry to start using Kprobes, you +can skip ahead to section 2. 1.1 How Does a Kprobe Work? @@ -161,13 +163,110 @@ In case probed function is entered but there is no kretprobe_instance object available, then in addition to incrementing the nmissed count, the user entry_handler invocation is also skipped. +1.4 How Does the Optimization Work? + + If you configured kernel with CONFIG_OPTPROBES=y (currently this option is +supported on x86/x86-64, non-preemptive kernel) and +"debug.kprobes_optimization" sysctl sets 1, kprobes tries to use a +jump instruction instead of breakpoint instruction automatically. + +1.4.1 Init a Kprobe + + Before preparing optimization, Kprobes inserts original(user-defined) +kprobe on the specified address. So, even if the kprobe is not +possible to be optimized, it just uses a normal kprobe. + +1.4.2 Safety check + + First, Kprobes gets the address of probed function and checks whether the +optimized region, which will be replaced by a jump instruction, does NOT +straddle the function boundary, because if the optimized region reaches the +next function, its caller causes unexpected results. + Next, Kprobes decodes whole body of probed function and checks there is +NO indirect jump, NO instruction which will cause exception by checking +exception_tables (this will jump to fixup code and fixup code jumps into +same function body) and NO near jump which jumps into the optimized region +(except the 1st byte of jump), because if some jump instruction jumps +into the middle of another instruction, it causes unexpected results too. + Kprobes also measures the length of instructions which will be replaced +by a jump instruction, because a jump instruction is longer than 1 byte, +it may replaces multiple instructions, and it checks whether those +instructions can be executed out-of-line. + +1.4.3 Preparing detour buffer + + Then, Kprobes prepares "detour" buffer, which contains exception emulating +code (push/pop registers, call handler), copied instructions(Kprobes copies +instructions which will be replaced by a jump, to the detour buffer), and +a jump which jumps back to the original execution path. + +1.4.4 Pre-optimization + + After preparing detour buffer, Kprobes checks that the probe is *NOT* in +the below cases; + - The probe has either break_handler or post_handler. + - Other probes are probing the instructions which will be replaced by + a jump instruction. + - The probe is disabled. +In above cases, Kprobes just doesn't start optimizating the probe. + + If the kprobe can be optimized, Kprobes enqueues the kprobe to optimizing +list and kicks kprobe-optimizer workqueue to optimize it. To wait other +optimized probes, kprobe-optimizer will delay to work. + When the optimized-kprobe is hit before optimization, its handler changes +IP(instruction pointer) to copied code and exits. So, the instructions which +were copied to detour buffer are executed on the detour buffer. + +1.4.5 Optimization + + Kprobe-optimizer doesn't start instruction-replacing soon, it waits +synchronize_sched for safety, because some processors are possible to be +interrupted on the instructions which will be replaced by a jump instruction. +As you know, synchronize_sched() can ensure that all interruptions which were +executed when synchronize_sched() was called are done, only if +CONFIG_PREEMPT=n. So, this version supports only the kernel with +CONFIG_PREEMPT=n.(*) + After that, kprobe-optimizer replaces the 4 bytes right after int3 +breakpoint with relative-jump destination, and synchronize caches on all +processors. And then, it replaces int3 with relative-jump opcode, and +synchronize caches again. + + After optimizing the probe, a CPU hits the jump instruction and jumps to +the out-of-line buffer directly. Thus the breakpoint exception is skipped. + +1.4.6 Unoptimization + + When unregistering, disabling kprobe or being blocked by other kprobe, +an optimized-kprobe will be unoptimized. Before kprobe-optimizer runs, +the kprobe just be dequeued from the optimized list. When the optimization +has been done, it replaces a jump with int3 breakpoint and original code. + First it puts int3 at the first byte of the jump, synchronize caches +on all processors, replaces the 4 bytes right after int3 with the original +code and synchronize caches again. + +(*)This optimization-safety checking may be replaced with stop-machine method + which ksplice is done for supporting CONFIG_PREEMPT=y kernel. + +NOTE for geeks: +The jump optimization changes the kprobe's pre_handler behavior. +Without optimization, pre_handler can change kernel execution path by +changing regs->ip and return 1. However, after optimizing the probe, +that modification is ignored. Thus, if you'd like to tweak kernel +execution path, you need to avoid optimization. In that case, you can +choose either, + - Set empty function to post_handler or break_handler. + or + - Config CONFIG_OPTPROBES=n. + or + - Execute 'sysctl -w debug.kprobes_optimization=n' + 2. Architectures Supported Kprobes, jprobes, and return probes are implemented on the following architectures: -- i386 -- x86_64 (AMD-64, EM64T) +- i386 (Supports jump optimization) +- x86_64 (AMD-64, EM64T) (Supports jump optimization) - ppc64 - ia64 (Does not support probes on instruction slot1.) - sparc64 (Return probes not yet implemented.) @@ -193,6 +292,10 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO), so you can use "objdump -d -l vmlinux" to see the source-to-object code mapping. +If you want to reduce probing overhead, set "Kprobes jump optimization +support" (CONFIG_OPTPROBES) to "y". You can find this option under +"Kprobes" line. + 4. API Reference The Kprobes API includes a "register" function and an "unregister" @@ -387,9 +490,12 @@ the probe which has been registered. 5. Kprobes Features and Limitations -Kprobes allows multiple probes at the same address. Currently, -however, there cannot be multiple jprobes on the same function at -the same time. +Kprobes allows multiple probes at the same address even if it is optimized. +Currently, however, there cannot be multiple jprobes on the same function +at the same time. And also, optimized kprobes can not invoke the +post_handler and the break_handler. So if you attempt to install the probe +which has the the post_handler or the break_handler at the same address of +an optimized kprobe, the probe will be unoptimized automatically. In general, you can install a probe anywhere in the kernel. In particular, you can probe interrupt handlers. Known exceptions @@ -453,6 +559,37 @@ reason, Kprobes doesn't support return probes (or kprobes or jprobes) on the x86_64 version of __switch_to(); the registration functions return -EINVAL. +On x86/x86-64, since the Jump Optimization of Kprobes modifies instructions +widely, there are some limitations for optimization. To explain it, +we introduce some terminology. Image certain binary line which is +constructed by 2 byte instruction, 2byte instruction and 3byte instruction. + + IA + | +[-2][-1][0][1][2][3][4][5][6][7] + [ins1][ins2][ ins3 ] + [<- DCR ->] + [<- JTPR ->] + +ins1: 1st Instruction +ins2: 2nd Instruction +ins3: 3rd Instruction +IA: Insertion Address +JTPR: Jump Target Prohibition Region +DCR: Detoured Code Region + +The instructions in DCR are copied to the out-of-line buffer +of the djprobe instance, because the bytes in JTPR are replaced by +a jump instruction. So, there are several limitations. + +a) The instructions in DCR must be relocatable. +b) The instructions in DCR must not include call instruction. +c) JTPR must not be targeted by any jump or call instruction. +d) DCR must not straddle the border betweeen functions. + +Anyway, these limitations are checked by in-kernel instruction decoder, +so you don't need to care about that. + 6. Probe Overhead On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0 @@ -476,6 +613,19 @@ k = 0.49 usec; j = 0.76; r = 0.80; kr = 0.82; jr = 1.07 ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU) k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99 +6.1 Optimized Probe Overhead + +Typically, an optimized kprobe hit takes 0.07 to 0.1 microseconds to +process. Here are sample overhead figures (in usec) for x86-64 architectures. +k = unoptimized kprobe, b = boosted(single-step skipped), o = optimized kprobe, +r = unoptimized kretprobe, rb = boosted kretprobe, ro = optimized kretprobe. + +i386: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips +k = 0.68 usec; b = 0.27; o = 0.06; r = 0.95; rb = 0.53; ro = 0.30 + +x86-64: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips +k = 0.91 usec; b = 0.40; o = 0.06; r = 1.21; rb = 0.71; ro = 0.35 + 7. TODO a. SystemTap (http://sourceware.org/systemtap): Provides a simplified @@ -523,7 +673,8 @@ is also specified. Following columns show probe status. If the probe is on a virtual address that is no longer valid (module init sections, module virtual addresses that correspond to modules that've been unloaded), such probes are marked with [GONE]. If the probe is temporarily disabled, -such probes are marked with [DISABLED]. +such probes are marked with [DISABLED]. If the probe is optimized, it is +marked with [OPTIMIZED]. /sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly. @@ -533,3 +684,18 @@ registered probes will be disarmed, till such time a "1" is echoed to this file. Note that this knob just disarms and arms all kprobes and doesn't change each probe's disabling state. This means that disabled kprobes (marked [DISABLED]) will be not enabled if you turn ON all kprobes by this knob. + + +Appendix B: The kprobes sysctl interface + +/proc/sys/debug/kprobes-optimization: Turn kprobes optimization ON/OFF. + +When CONFIG_OPTPROBES=y, this sysctl interface appears and it provides a knob +to globally and forcibly turn the jump optimization ON or OFF. By default, +jump optimization is allowed(ON). By echoing "0" to this file or By setting +0 to "debug.kprobes_optimization" via sysctl, all optimized probes will be +unoptimized. And new probes registered after that will not be optimized. +Note that this knob *Changes* the optimized state. This means that optimized +probes (marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be +removed). And after the knob is turned on, it will be optimized again. + -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America), Inc. Software Solutions Division e-mail: mhi...@re... |
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:12:13
|
Add generic text_poke_smp for SMP which uses stop_machine()
to synchronize modifying code.
This stop_machine() method is officially described at "7.1.3
Handling Self- and Cross-Modifying Code" on the intel's
software developer's manual 3A.
Since stop_machine() can't protect code against NMI/MCE, this
function can not modify those handlers. And also, this function
is basically for modifying multibyte-single-instruction. For
modifying multibyte-multi-instructions, we need another special
trap & detour code.
This code originaly comes from immediate values with stop_machine()
version. Thanks Mathieu!
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Mathieu Desnoyers <co...@kr...>
Cc: Ananth N Mavinakayanahalli <an...@in...>
Cc: Ingo Molnar <mi...@el...>
Cc: Jim Keniston <jke...@us...>
Cc: Srikar Dronamraju <sr...@li...>
Cc: Christoph Hellwig <hc...@in...>
Cc: Steven Rostedt <ro...@go...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: H. Peter Anvin <hp...@zy...>
Cc: Anders Kaseorg <an...@ks...>
Cc: Tim Abbott <ta...@ks...>
Cc: Andi Kleen <an...@fi...>
Cc: Jason Baron <jb...@re...>
---
arch/x86/include/asm/alternative.h | 4 ++
arch/x86/kernel/alternative.c | 60 ++++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+), 1 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7..812ee2c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -155,10 +155,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
* invalid instruction possible) or if the instructions are changed from a
* consistent state to another consistent state atomically.
* More care must be taken when modifying code in the SMP case because of
- * Intel's errata.
+ * Intel's errata. text_poke_smp() takes care that errata, but still
+ * doesn't support NMI/MCE handler code modifying.
* On the local CPU you need to be protected again NMI or MCE handlers seeing an
* inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c..7ce45d7 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/memory.h>
+#include <linux/stop_machine.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -552,3 +553,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
local_irq_restore(flags);
return addr;
}
+
+/*
+ * Cross-modifying kernel text with stop_machine().
+ * This code originally comes from immediate value.
+ */
+static atomic_t stop_machine_first;
+static int wrote_text;
+
+struct text_poke_params {
+ void *addr;
+ const void *opcode;
+ size_t len;
+};
+
+static int __kprobes stop_machine_text_poke(void *data)
+{
+ struct text_poke_params *tpp = data;
+
+ if (atomic_dec_and_test(&stop_machine_first)) {
+ text_poke(tpp->addr, tpp->opcode, tpp->len);
+ smp_wmb(); /* Make sure other cpus see that this has run */
+ wrote_text = 1;
+ } else {
+ while (!wrote_text)
+ smp_rmb();
+ sync_core();
+ }
+
+ flush_icache_range((unsigned long)tpp->addr,
+ (unsigned long)tpp->addr + tpp->len);
+ return 0;
+}
+
+/**
+ * text_poke_smp - Update instructions on a live kernel on SMP
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. This allows
+ * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
+ * should be allowed, since stop_machine() does _not_ protect code against
+ * NMI and MCE.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
+{
+ struct text_poke_params tpp;
+
+ tpp.addr = addr;
+ tpp.opcode = opcode;
+ tpp.len = len;
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ return addr;
+}
+
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:12:07
|
Integrate prepare_singlestep() into setup_singlestep() to boost up reenter
probes, if possible.
Signed-off-by: Masami Hiramatsu <mhi...@re...>
Cc: Ananth N Mavinakayanahalli <an...@in...>
Cc: Ingo Molnar <mi...@el...>
Cc: Jim Keniston <jke...@us...>
Cc: Srikar Dronamraju <sr...@li...>
Cc: Christoph Hellwig <hc...@in...>
Cc: Steven Rostedt <ro...@go...>
Cc: Frederic Weisbecker <fwe...@gm...>
Cc: H. Peter Anvin <hp...@zy...>
Cc: Anders Kaseorg <an...@ks...>
Cc: Tim Abbott <ta...@ks...>
Cc: Andi Kleen <an...@fi...>
Cc: Jason Baron <jb...@re...>
Cc: Mathieu Desnoyers <co...@kr...>
---
arch/x86/kernel/kprobes.c | 48 ++++++++++++++++++++++++---------------------
1 files changed, 26 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7039e6e..57de6ca 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -403,18 +403,6 @@ static void __kprobes restore_btf(void)
update_debugctlmsr(current->thread.debugctlmsr);
}
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
- clear_btf();
- regs->flags |= X86_EFLAGS_TF;
- regs->flags &= ~X86_EFLAGS_IF;
- /* single step inline if the instruction is an int3 */
- if (p->opcode == BREAKPOINT_INSTRUCTION)
- regs->ip = (unsigned long)p->addr;
- else
- regs->ip = (unsigned long)p->ainsn.insn;
-}
-
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -427,19 +415,38 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
}
static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+ struct kprobe_ctlblk *kcb, int reenter)
{
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER)
if (p->ainsn.boostable == 1 && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
- reset_current_kprobe();
+ if (!reenter)
+ reset_current_kprobe();
+ /*
+ * Reentering boosted probe doesn't reset current_kprobe,
+ * nor set current_kprobe, because it doesn't use single
+ * stepping.
+ */
regs->ip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return;
}
#endif
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_HIT_SS;
+ if (reenter) {
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ /* Prepare real single stepping */
+ clear_btf();
+ regs->flags |= X86_EFLAGS_TF;
+ regs->flags &= ~X86_EFLAGS_IF;
+ /* single step inline if the instruction is an int3 */
+ if (p->opcode == BREAKPOINT_INSTRUCTION)
+ regs->ip = (unsigned long)p->addr;
+ else
+ regs->ip = (unsigned long)p->ainsn.insn;
}
/*
@@ -453,11 +460,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
case KPROBE_HIT_ACTIVE:
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_REENTER;
+ setup_singlestep(p, regs, kcb, 1);
break;
case KPROBE_HIT_SS:
/* A probe has been hit in the codepath leading up to, or just
@@ -532,13 +536,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* more here.
*/
if (!p->pre_handler || !p->pre_handler(p, regs))
- setup_singlestep(p, regs, kcb);
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} else if (kprobe_running()) {
p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
- setup_singlestep(p, regs, kcb);
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: mhi...@re...
|
|
From: Masami H. <mhi...@re...> - 2009-12-18 18:12:01
|
Introduce SAVE/RESOTRE_REGS_STRING for cleanup kretprobe-trampoline asm code. These macros will be used for emulating interruption. Signed-off-by: Masami Hiramatsu <mhi...@re...> Cc: Ananth N Mavinakayanahalli <an...@in...> Cc: Ingo Molnar <mi...@el...> Cc: Jim Keniston <jke...@us...> Cc: Srikar Dronamraju <sr...@li...> Cc: Christoph Hellwig <hc...@in...> Cc: Steven Rostedt <ro...@go...> Cc: Frederic Weisbecker <fwe...@gm...> Cc: H. Peter Anvin <hp...@zy...> Cc: Anders Kaseorg <an...@ks...> Cc: Tim Abbott <ta...@ks...> Cc: Andi Kleen <an...@fi...> Cc: Jason Baron <jb...@re...> Cc: Mathieu Desnoyers <co...@kr...> --- arch/x86/kernel/kprobes.c | 128 ++++++++++++++++++++++++--------------------- 1 files changed, 67 insertions(+), 61 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 57de6ca..f848984 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -551,6 +551,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 0; } +#ifdef CONFIG_X86_64 +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax. */ \ + " subq $24, %rsp\n" \ + " pushq %rdi\n" \ + " pushq %rsi\n" \ + " pushq %rdx\n" \ + " pushq %rcx\n" \ + " pushq %rax\n" \ + " pushq %r8\n" \ + " pushq %r9\n" \ + " pushq %r10\n" \ + " pushq %r11\n" \ + " pushq %rbx\n" \ + " pushq %rbp\n" \ + " pushq %r12\n" \ + " pushq %r13\n" \ + " pushq %r14\n" \ + " pushq %r15\n" +#define RESTORE_REGS_STRING \ + " popq %r15\n" \ + " popq %r14\n" \ + " popq %r13\n" \ + " popq %r12\n" \ + " popq %rbp\n" \ + " popq %rbx\n" \ + " popq %r11\n" \ + " popq %r10\n" \ + " popq %r9\n" \ + " popq %r8\n" \ + " popq %rax\n" \ + " popq %rcx\n" \ + " popq %rdx\n" \ + " popq %rsi\n" \ + " popq %rdi\n" \ + /* Skip orig_ax, ip, cs */ \ + " addq $24, %rsp\n" +#else +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax and gs. */ \ + " subl $16, %esp\n" \ + " pushl %fs\n" \ + " pushl %ds\n" \ + " pushl %es\n" \ + " pushl %eax\n" \ + " pushl %ebp\n" \ + " pushl %edi\n" \ + " pushl %esi\n" \ + " pushl %edx\n" \ + " pushl %ecx\n" \ + " pushl %ebx\n" +#define RESTORE_REGS_STRING \ + " popl %ebx\n" \ + " popl %ecx\n" \ + " popl %edx\n" \ + " popl %esi\n" \ + " popl %edi\n" \ + " popl %ebp\n" \ + " popl %eax\n" \ + /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ + " addl $24, %esp\n" +#endif + /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. @@ -564,65 +627,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void) /* We don't bother saving the ss register */ " pushq %rsp\n" " pushfq\n" - /* - * Skip cs, ip, orig_ax. - * trampoline_handler() will plug in these values - */ - " subq $24, %rsp\n" - " pushq %rdi\n" - " pushq %rsi\n" - " pushq %rdx\n" - " pushq %rcx\n" - " pushq %rax\n" - " pushq %r8\n" - " pushq %r9\n" - " pushq %r10\n" - " pushq %r11\n" - " pushq %rbx\n" - " pushq %rbp\n" - " pushq %r12\n" - " pushq %r13\n" - " pushq %r14\n" - " pushq %r15\n" + SAVE_REGS_STRING " movq %rsp, %rdi\n" " call trampoline_handler\n" /* Replace saved sp with true return address. */ " movq %rax, 152(%rsp)\n" - " popq %r15\n" - " popq %r14\n" - " popq %r13\n" - " popq %r12\n" - " popq %rbp\n" - " popq %rbx\n" - " popq %r11\n" - " popq %r10\n" - " popq %r9\n" - " popq %r8\n" - " popq %rax\n" - " popq %rcx\n" - " popq %rdx\n" - " popq %rsi\n" - " popq %rdi\n" - /* Skip orig_ax, ip, cs */ - " addq $24, %rsp\n" + RESTORE_REGS_STRING " popfq\n" #else " pushf\n" - /* - * Skip cs, ip, orig_ax and gs. - * trampoline_handler() will plug in these values - */ - " subl $16, %esp\n" - " pushl %fs\n" - " pushl %es\n" - " pushl %ds\n" - " pushl %eax\n" - " pushl %ebp\n" - " pushl %edi\n" - " pushl %esi\n" - " pushl %edx\n" - " pushl %ecx\n" - " pushl %ebx\n" + SAVE_REGS_STRING " movl %esp, %eax\n" " call trampoline_handler\n" /* Move flags to cs */ @@ -630,15 +644,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void) " movl %edx, 52(%esp)\n" /* Replace saved flags with true return address. */ " movl %eax, 56(%esp)\n" - " popl %ebx\n" - " popl %ecx\n" - " popl %edx\n" - " popl %esi\n" - " popl %edi\n" - " popl %ebp\n" - " popl %eax\n" - /* Skip ds, es, fs, gs, orig_ax and ip */ - " addl $24, %esp\n" + RESTORE_REGS_STRING " popf\n" #endif " ret\n"); -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America), Inc. Software Solutions Division e-mail: mhi...@re... |