You can subscribe to this list here.
| 2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(32) |
Jun
(66) |
Jul
(102) |
Aug
(78) |
Sep
(106) |
Oct
(137) |
Nov
(147) |
Dec
(147) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2010 |
Jan
(71) |
Feb
(139) |
Mar
(86) |
Apr
(76) |
May
(57) |
Jun
(10) |
Jul
(12) |
Aug
(6) |
Sep
(8) |
Oct
(12) |
Nov
(12) |
Dec
(18) |
| 2011 |
Jan
(16) |
Feb
(19) |
Mar
(3) |
Apr
(1) |
May
(16) |
Jun
(17) |
Jul
(74) |
Aug
(22) |
Sep
(18) |
Oct
(24) |
Nov
(21) |
Dec
(30) |
| 2012 |
Jan
(31) |
Feb
(16) |
Mar
(22) |
Apr
(25) |
May
(18) |
Jun
(13) |
Jul
(83) |
Aug
(49) |
Sep
(20) |
Oct
(60) |
Nov
(35) |
Dec
(28) |
| 2013 |
Jan
(39) |
Feb
(61) |
Mar
(35) |
Apr
(21) |
May
(45) |
Jun
(56) |
Jul
(20) |
Aug
(9) |
Sep
(10) |
Oct
(31) |
Nov
(8) |
Dec
(4) |
| 2014 |
Jan
(6) |
Feb
(7) |
Mar
(7) |
Apr
(6) |
May
(4) |
Jun
(8) |
Jul
(5) |
Aug
(2) |
Sep
(4) |
Oct
(4) |
Nov
(11) |
Dec
(5) |
| 2015 |
Jan
(4) |
Feb
(4) |
Mar
(3) |
Apr
(4) |
May
(9) |
Jun
(4) |
Jul
(15) |
Aug
(8) |
Sep
(16) |
Oct
(18) |
Nov
(15) |
Dec
(7) |
| 2016 |
Jan
(20) |
Feb
(9) |
Mar
(15) |
Apr
(24) |
May
(16) |
Jun
(28) |
Jul
(22) |
Aug
(23) |
Sep
(18) |
Oct
(30) |
Nov
(40) |
Dec
(9) |
| 2017 |
Jan
(1) |
Feb
(8) |
Mar
(37) |
Apr
(26) |
May
(25) |
Jun
(46) |
Jul
(24) |
Aug
(9) |
Sep
|
Oct
|
Nov
|
Dec
|
|
From: Hagen P. P. <ha...@ja...> - 2011-12-18 01:07:49
|
> Sometimes network packets are dropped for some reason. In enterprise > systems which require strict RAS functionality, we must know the > reason why it happened and explain it to our customers even if using > TCP. When we investigate the incidents, at first we try to find out > whether the problem is in the server(kernel, application) or else > (router, hub etc). And next we try to find out which layer > (application/middleware/kernel(IP/TCP/UDP/..)etc.) the problem > occurs. For the first question tcpdump may the right tool. For the later systemtap can be used. I mean we now have the possibility to instrument the kernel at runtime, without bloating the source. Anyway: is 63e03724b51 not suitable to gather the required information easily? Hagen |
|
From: Stephen H. <ste...@vy...> - 2011-12-17 00:17:24
|
> Sometimes network packets are dropped for some reason. In enterprise > systems which require strict RAS functionality, we must know the > reason why it happened and explain it to our customers even if using > TCP. When we investigate the incidents, at first we try to find out > whether the problem is in the server(kernel, application) or else > (router, hub etc). And next we try to find out which layer > (application/middleware/kernel(IP/TCP/UDP/..)etc.) the problem > occurs. I feel sorry for you, your users don't understand TCP. TCP intentionally induces loss to measure capacity. This is one of the fundamental principles of loss based congestion control. |
|
From: Satoru M. <sat...@hd...> - 2011-12-16 22:17:38
|
This patch adds a tracepoint to tcp_retransmit_skb() to get the
sk, skb and return value. This helps one understand whether problems
are in a server or not.
Signed-off-by: Satoru Moriya <sat...@hd...>
---
include/trace/events/tcp.h | 35 +++++++++++++++++++++++++++++++++++
net/core/net-traces.c | 1 +
net/ipv4/tcp_output.c | 3 +++
3 files changed, 39 insertions(+), 0 deletions(-)
create mode 100644 include/trace/events/tcp.h
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index 0000000..821cdb7
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,35 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(tcp_retransmit_skb,
+
+ TP_PROTO(struct sock *sk, struct sk_buff *skb, int err),
+
+ TP_ARGS(sk, skb, err),
+
+ TP_STRUCT__entry(
+ __field(void *, skaddr)
+ __field(void *, skbaddr)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->skaddr = sk;
+ __entry->skbaddr = skb;
+ __entry->err = err;
+ ),
+
+ TP_printk("sk=%p skb=%p err=%d",
+ __entry->skaddr, __entry->skbaddr, __entry->err)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index ba3c012..63f966b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -31,6 +31,7 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/tcp.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 13d3a79..a6db789 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -40,6 +40,8 @@
#include <linux/gfp.h>
#include <linux/module.h>
+#include <trace/events/tcp.h>
+
/* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse __read_mostly = 1;
@@ -2188,6 +2190,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
}
out:
+ trace_tcp_retransmit_skb(sk, skb, err);
return err;
}
--
1.7.6.4
|
|
From: Satoru M. <sat...@hd...> - 2011-12-16 22:16:39
|
This is just a cleanup patch for making easy to hook return value
with a tracepoint.
Signed-off-by: Satoru Moriya <sat...@hd...>
---
net/ipv4/tcp_output.c | 31 +++++++++++++++++++++----------
1 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 63170e2..13d3a79 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2089,18 +2089,24 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if (atomic_read(&sk->sk_wmem_alloc) >
- min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
- return -EAGAIN;
+ min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) {
+ err = -EAGAIN;
+ goto out;
+ }
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
- if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
- return -ENOMEM;
+ if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) {
+ err = -ENOMEM;
+ goto out;
+ }
}
- if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
- return -EHOSTUNREACH; /* Routing failure or similar. */
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) {
+ err = -EHOSTUNREACH; /* Routing failure or similar. */
+ goto out;
+ }
cur_mss = tcp_current_mss(sk);
@@ -2110,12 +2116,16 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* our retransmit serves as a zero window probe.
*/
if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
- TCP_SKB_CB(skb)->seq != tp->snd_una)
- return -EAGAIN;
+ TCP_SKB_CB(skb)->seq != tp->snd_una) {
+ err = -EAGAIN;
+ goto out;
+ }
if (skb->len > cur_mss) {
- if (tcp_fragment(sk, skb, cur_mss, cur_mss))
- return -ENOMEM; /* We'll try again later. */
+ if (tcp_fragment(sk, skb, cur_mss, cur_mss)) {
+ err = -ENOMEM; /* We'll try again later. */
+ goto out;
+ }
} else {
int oldpcount = tcp_skb_pcount(skb);
@@ -2177,6 +2187,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
}
+out:
return err;
}
--
1.7.6.4
|
|
From: Satoru M. <sat...@hd...> - 2011-12-16 22:15:31
|
Sometimes network packets are dropped for some reason. In enterprise systems which require strict RAS functionality, we must know the reason why it happened and explain it to our customers even if using TCP. When we investigate the incidents, at first we try to find out whether the problem is in the server(kernel, application) or else (router, hub etc). And next we try to find out which layer (application/middleware/kernel(IP/TCP/UDP/..)etc.) the problem occurs. In application layer, user applications and/or middlewares usually save logs if they dropped packets. In kernel layer, with this tracepoint, we are able to know whether the kernel(TCP layer) send packets successfully or not. With a combination of them, we can find out whether the problem is in the server or not. Satoru Moriya (2): tcp: refactor tcp_retransmit_skb() for a single return point tcp: add tracepoint for tcp retransmission include/trace/events/tcp.h | 35 +++++++++++++++++++++++++++++++++++ net/core/net-traces.c | 1 + net/ipv4/tcp_output.c | 34 ++++++++++++++++++++++++---------- 3 files changed, 60 insertions(+), 10 deletions(-) create mode 100644 include/trace/events/tcp.h -- 1.7.6.4 |
|
From: Seiji A. <sei...@hd...> - 2011-12-15 16:03:31
|
Hi, > >No. I have different understanding. > >Because error log address range resides in NVRAM, the contents will be >reserved even after reboot. So we do not need read/clear operations at >all, and should place all records in error log address range. I have a quick question about your comment above. Do you know whether this optimization of read/clear operations works on machines which APEI is enabled by WHEA _OSC call? Seiji |
|
From: Seiji A. <sei...@hd...> - 2011-12-12 22:33:23
|
>> This patch adds trace_jbd2_drop_transaction and trace_jbd2_update_superblock_end >> because there are similar tracepoints in jbd and they are needed in jbd2 as well. >> >> Seiji > >Looks good, thanks. > >Reviewed-by: Lukas Czerner <lcz...@re...> > >> >> >> Signed-off-by: Seiji Aguchi <sei...@hd...> Are there anyone else who can review my patch? Seiji |
|
From: Luck, T. <ton...@in...> - 2011-12-12 18:49:01
|
> if (reason == KMSG_DUMP_PANIC) {
> if(is_spin_locked(&psinfo->buf_lock))
> pr_err("lock is taken.\n");
> else {
> spin_lock_irqsave(&psinfo->buf_lock, flags);
> }
>
> However, this won't work for this reason.
> - printk() must not be called in serialized path because deadlock of logbuf_lock may cause.
There is also the issue that kmsg has already computed the addresses
of useful pieces in __log_buf at this point - so any printk() we
add here is not going to be saved into pstore. So a user relying
on pstore to see what happened, won't see any messages we add here.
-Tony
|
|
From: Seiji A. <sei...@hd...> - 2011-12-12 18:33:04
|
Hi,
>> - if (in_nmi()) {
>> - is_locked = spin_trylock(&psinfo->buf_lock);
>> - if (!is_locked)
>> - pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
>> - } else
>> + /*
>> + * pstore_dump() is serialized in panic path.
>> + * So, we don't need to take any locks.
>> + */
>> + if (reason != KMSG_DUMP_PANIC)
>> spin_lock_irqsave(&psinfo->buf_lock, flags);
>
>This probably won't work because the original check wasn't necessarily for
>locking purposes but to see if a lock was already taken in the serialized
>path. If buf_lock is already held, the code may have trouble executing
>the backend, hence the pr_err.
If locking of backend drivers ,such as erst_lock/efivars_lock, was held in serialized path,
the backend drivers may have some troubles.
On the other hand, pstore will not have any trouble with my code because psinfo->buf_lock is protecting
the memory ,psinfo->buf, which backend driver use to save data and the content of psinfo->buf is
simply overwritten in pstore_dump().
So, pstore can just blindly continue even if psinfo->buf_lock is held in the serialized path.
If there are some troubles in backend drivers, they should be fixed.
I guess you suggest me to add some code checking lock status so that users/developers know
the reason why pstore failed.
So, I have a comment about that.
If you would like to just check if a lock was already in the serialized path,
"is_spin_locked()" macro is better.
In addition to that, if you would like to let users know the locking status, pr_err() is needed.
So, the code will be as follows.
if (reason == KMSG_DUMP_PANIC) {
if(is_spin_locked(&psinfo->buf_lock))
pr_err("lock is taken.\n");
else {
spin_lock_irqsave(&psinfo->buf_lock, flags);
}
However, this won't work for this reason.
- printk() must not be called in serialized path because deadlock of logbuf_lock may cause.
Of course, we can avoid deadlock if spin_lock_init(logbuf_lock) is called. But spin_lock_init()
is not accepted in previous discussion.
>> + WARN_ON(in_nmi() && reason != KMSG_DUMP_PANIC);
>> +
>
>You will need a comment to explain why the above is there.
I agree. The comment is needed.
Seiji
|
|
From: Don Z. <dz...@re...> - 2011-12-12 16:01:26
|
On Fri, Dec 02, 2011 at 05:09:32PM -0500, Seiji Aguchi wrote: > This patch just moves kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop > for serializing logging process via smp_send_stop. > > Signed-off-by: Seiji Aguchi <sei...@hd...> This is a useful first step towards serializing the kmsg_dump framework. It takes a similar approach to kdump and makes writing code here easier. Acked-by: Don Zickus <dz...@re...> > > --- > kernel/panic.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/kernel/panic.c b/kernel/panic.c > index b265936..c8e0ae8 100644 > --- a/kernel/panic.c > +++ b/kernel/panic.c > @@ -88,8 +88,6 @@ NORET_TYPE void panic(const char * fmt, ...) > */ > crash_kexec(NULL); > > - kmsg_dump(KMSG_DUMP_PANIC); > - > /* > * Note smp_send_stop is the usual smp shutdown function, which > * unfortunately means it may not be hardened to work in a panic > @@ -97,6 +95,8 @@ NORET_TYPE void panic(const char * fmt, ...) > */ > smp_send_stop(); > > + kmsg_dump(KMSG_DUMP_PANIC); > + > atomic_notifier_call_chain(&panic_notifier_list, 0, buf); > > bust_spinlocks(0); > -- 1.7.1 |
|
From: Don Z. <dz...@re...> - 2011-12-12 15:58:59
|
On Fri, Dec 02, 2011 at 05:10:26PM -0500, Seiji Aguchi wrote:
> Patch Description:
> - Skip spin_locks in panic case in both kmsg_dump() and pstore_dump() because they are
> serialized via smp_send_stop
>
> - Add WARN_ON() in "in_nmi() and !panic" case into kmsg_dump(). Currently, this case never
> happens because only kmsg_dump(KMSG_DUMP_PANIC) is called in NMI case.
> But if someone adds new kmsg_dump() into NMI path in the future, kmsg_dump() may deadlock.
> We can trap it and complain with this WARN_ON().
>
>
> With this patch, kmsg_dump()/pstore_dump() work as follows.
> panic case (KMSG_DUMP_PANIC):
> - don't take lock because they are serialized.
>
> not panic case (KMSG_DUMP_OOPS/KMSG_DUMP_EMERG/KMSG_DUMP_RESTART/KMSG_DUMP_HALT):
> - take locks normally
>
> Regarding as NMI case,
> - kmsg_dump()/pstore_dump() don't take locks, so deadlock issue will not happen
> because kmsg_dump() is called in just panic case with current implementation.
> - If someone adds new kmsg_dump() into NMI path, WARN_ON() is called.
> So we can trap it and ask to fix it.
>
> Signed-off-by: Seiji Aguchi <sei...@hd...>
>
> ---
> fs/pstore/platform.c | 16 ++++++----------
> kernel/printk.c | 13 +++++++++++--
> 2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
> index 57bbf90..823669e 100644
> --- a/fs/pstore/platform.c
> +++ b/fs/pstore/platform.c
> @@ -90,18 +90,17 @@ static void pstore_dump(struct kmsg_dumper *dumper,
> int hsize, ret;
> unsigned int part = 1;
> unsigned long flags = 0;
> - int is_locked = 0;
>
> if (reason < ARRAY_SIZE(reason_str))
> why = reason_str[reason];
> else
> why = "Unknown";
>
> - if (in_nmi()) {
> - is_locked = spin_trylock(&psinfo->buf_lock);
> - if (!is_locked)
> - pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
> - } else
> + /*
> + * pstore_dump() is serialized in panic path.
> + * So, we don't need to take any locks.
> + */
> + if (reason != KMSG_DUMP_PANIC)
> spin_lock_irqsave(&psinfo->buf_lock, flags);
This probably won't work because the original check wasn't necessarily for
locking purposes but to see if a lock was already taken in the serialized
path. If buf_lock is already held, the code may have trouble executing
the backend, hence the pr_err.
> oopscount++;
> while (total < kmsg_bytes) {
> @@ -131,10 +130,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
> total += l1_cpy + l2_cpy;
> part++;
> }
> - if (in_nmi()) {
> - if (is_locked)
> - spin_unlock(&psinfo->buf_lock);
> - } else
> + if (reason != KMSG_DUMP_PANIC)
> spin_unlock_irqrestore(&psinfo->buf_lock, flags);
> }
>
> diff --git a/kernel/printk.c b/kernel/printk.c
> index 1455a0d..bc5ac61 100644
> --- a/kernel/printk.c
> +++ b/kernel/printk.c
> @@ -1732,13 +1732,22 @@ void kmsg_dump(enum kmsg_dump_reason reason)
> unsigned long l1, l2;
> unsigned long flags;
>
> + WARN_ON(in_nmi() && reason != KMSG_DUMP_PANIC);
> +
You will need a comment to explain why the above is there.
> /* Theoretically, the log could move on after we do this, but
> there's not a lot we can do about that. The new messages
> will overwrite the start of what we dump. */
> - raw_spin_lock_irqsave(&logbuf_lock, flags);
> +
> + /*
> + * kmsg_dump(KMSG_DUMP_PANIC) is serialized.
> + * So, we don't need to take any locks.
> + */
> + if (reason != KMSG_DUMP_PANIC)
> + raw_spin_lock_irqsave(&logbuf_lock, flags);
> end = log_end & LOG_BUF_MASK;
> chars = logged_chars;
> - raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> + if (reason != KMSG_DUMP_PANIC)
> + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
>
> if (chars > end) {
> s1 = log_buf + log_buf_len - chars + end;
Cheers,
Don
|
|
From: 天怀 <zha...@so...> - 2011-12-04 14:25:14
|
样,厕所偷看法也退出了历史舞台。一来二去,偷偷看书的方法就只 助你快速通过专本科、研究生学历(学位)和英语、计算机、 样,厕所偷看法也退出了历史舞台。一来二去,偷偷看书的方法就只 会计、建造师、资格讠正书. 八糟的小说,上千本总是有读过的。这些小说的共同点在于,人物都 可提供国外学历认讠正!联系王老师QQ: 6543677135176074981 八糟的小说,上千本总是有读过的。这些小说的共同点在于,人物都 |
|
From: Seiji A. <sei...@hd...> - 2011-12-02 22:14:13
|
This patch skips subsequent kmsg_dump() function calls in panic path With this patch, we can avoid deadlock due to the subsequent calls. Actually, kmsg_dump(KMSG_DUMP_EMERG) is called after kmsg_dump(KMSG_DUMP_PANIC) when panic_timeout variable is set. Signed-off-by: Seiji Aguchi <sei...@hd...> --- kernel/printk.c | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/kernel/printk.c b/kernel/printk.c index bc5ac61..25d6dc1 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1731,6 +1731,16 @@ void kmsg_dump(enum kmsg_dump_reason reason) const char *s1, *s2; unsigned long l1, l2; unsigned long flags; + static bool panicked; + + /* + * kmsg_dump() is skipped because we already got panic log. + */ + if (panicked) + return; + + if (reason == KMSG_DUMP_PANIC) + panicked = true; WARN_ON(in_nmi() && reason != KMSG_DUMP_PANIC); -- 1.7.1 |
|
From: Seiji A. <sei...@hd...> - 2011-12-02 22:10:51
|
Patch Description:
- Skip spin_locks in panic case in both kmsg_dump() and pstore_dump() because they are
serialized via smp_send_stop
- Add WARN_ON() in "in_nmi() and !panic" case into kmsg_dump(). Currently, this case never
happens because only kmsg_dump(KMSG_DUMP_PANIC) is called in NMI case.
But if someone adds new kmsg_dump() into NMI path in the future, kmsg_dump() may deadlock.
We can trap it and complain with this WARN_ON().
With this patch, kmsg_dump()/pstore_dump() work as follows.
panic case (KMSG_DUMP_PANIC):
- don't take lock because they are serialized.
not panic case (KMSG_DUMP_OOPS/KMSG_DUMP_EMERG/KMSG_DUMP_RESTART/KMSG_DUMP_HALT):
- take locks normally
Regarding as NMI case,
- kmsg_dump()/pstore_dump() don't take locks, so deadlock issue will not happen
because kmsg_dump() is called in just panic case with current implementation.
- If someone adds new kmsg_dump() into NMI path, WARN_ON() is called.
So we can trap it and ask to fix it.
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
fs/pstore/platform.c | 16 ++++++----------
kernel/printk.c | 13 +++++++++++--
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 57bbf90..823669e 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -90,18 +90,17 @@ static void pstore_dump(struct kmsg_dumper *dumper,
int hsize, ret;
unsigned int part = 1;
unsigned long flags = 0;
- int is_locked = 0;
if (reason < ARRAY_SIZE(reason_str))
why = reason_str[reason];
else
why = "Unknown";
- if (in_nmi()) {
- is_locked = spin_trylock(&psinfo->buf_lock);
- if (!is_locked)
- pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
- } else
+ /*
+ * pstore_dump() is serialized in panic path.
+ * So, we don't need to take any locks.
+ */
+ if (reason != KMSG_DUMP_PANIC)
spin_lock_irqsave(&psinfo->buf_lock, flags);
oopscount++;
while (total < kmsg_bytes) {
@@ -131,10 +130,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
total += l1_cpy + l2_cpy;
part++;
}
- if (in_nmi()) {
- if (is_locked)
- spin_unlock(&psinfo->buf_lock);
- } else
+ if (reason != KMSG_DUMP_PANIC)
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
}
diff --git a/kernel/printk.c b/kernel/printk.c
index 1455a0d..bc5ac61 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1732,13 +1732,22 @@ void kmsg_dump(enum kmsg_dump_reason reason)
unsigned long l1, l2;
unsigned long flags;
+ WARN_ON(in_nmi() && reason != KMSG_DUMP_PANIC);
+
/* Theoretically, the log could move on after we do this, but
there's not a lot we can do about that. The new messages
will overwrite the start of what we dump. */
- raw_spin_lock_irqsave(&logbuf_lock, flags);
+
+ /*
+ * kmsg_dump(KMSG_DUMP_PANIC) is serialized.
+ * So, we don't need to take any locks.
+ */
+ if (reason != KMSG_DUMP_PANIC)
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
end = log_end & LOG_BUF_MASK;
chars = logged_chars;
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ if (reason != KMSG_DUMP_PANIC)
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
if (chars > end) {
s1 = log_buf + log_buf_len - chars + end;
--
1.7.1
|
|
From: Seiji A. <sei...@hd...> - 2011-12-02 22:10:03
|
This patch just moves kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop for serializing logging process via smp_send_stop. Signed-off-by: Seiji Aguchi <sei...@hd...> --- kernel/panic.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index b265936..c8e0ae8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -88,8 +88,6 @@ NORET_TYPE void panic(const char * fmt, ...) */ crash_kexec(NULL); - kmsg_dump(KMSG_DUMP_PANIC); - /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic @@ -97,6 +95,8 @@ NORET_TYPE void panic(const char * fmt, ...) */ smp_send_stop(); + kmsg_dump(KMSG_DUMP_PANIC); + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); bust_spinlocks(0); -- 1.7.1 |
|
From: Seiji A. <sei...@hd...> - 2011-12-02 22:09:11
|
Hi, Discussion: As Don mentioned in following thread, it would be nice for pstore/kmsg_dump to serialize panic path because they can log messages reliably. https://lkml.org/lkml/2011/10/13/427 This patchset is based on his proposal switching smp_send_stop() from REBOOT_VECTOR to NMI. Change Log: v2 -> v3 - Skip spin_locks in panic case in both kmsg_dump() and pstore_dump() instead of calling spin_lock_init() to avoid potential issues due to spin_lock_init() - Add WARN_ON() in "in_nmi() and !panic" case into kmsg_dump() so that we trap when someone adds new kmsg_dump() in NMI path in the future - Skip subsequent kmsg_dump() function calls to avoid deadlock. v1 -> v2 - Add trylocks to kmsg_dump()/pstore_dump() so that they can work in NMI context. - Divide a patch into two First one is just moving kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop() Second one is changing lock operations in kmsg_dump()/pstore_dump() v1 - Move kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop - Bust logbuf_lock of kmsg_dump() in panic path for avoiding deadlock - Bust psinfo->buf_lock of pstore_dump() in panic path for avoiding deadlock Patch Description: [RFC][PATCH v3 1/3] Move kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop() - Just move kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop() [RFC][PATCH v3 2/3] Skip spin_locks in panic case and add WARN_ON() - Skip spin_locks in panic case in both kmsg_dump() and pstore_dump() - Add WARN_ON() in "in_nmi() and !panic" case into kmsg_dump() [RFC][PATCH v3 3/3] Skip subsequent kmsg_dump() - Skip subsequent kmsg_dump() function calls TODO: This patchset focuses on only kmsg_dump()/pstore_dump(). So, we have to check whether backend drivers work. Any comments are welcome. Seiji Aguchi (3): Move kmsg_dump(KMSG_DUMP_PANIC) below smp_send_stop() Skip spin_locks in panic case and add WARN_ON() Skip subsequent kmsg_dump() fs/pstore/platform.c | 16 ++++++---------- kernel/panic.c | 4 ++-- kernel/printk.c | 23 +++++++++++++++++++++-- 3 files changed, 29 insertions(+), 14 deletions(-) |
|
From: Lukas C. <lcz...@re...> - 2011-11-30 11:29:16
|
On Tue, 29 Nov 2011, Seiji Aguchi wrote:
> Hi
>
> This patch adds trace_jbd2_drop_transaction and trace_jbd2_update_superblock_end
> because there are similar tracepoints in jbd and they are needed in jbd2 as well.
>
> Seiji
Looks good, thanks.
Reviewed-by: Lukas Czerner <lcz...@re...>
>
>
> Signed-off-by: Seiji Aguchi <sei...@hd...>
>
> ---
> fs/jbd2/checkpoint.c | 2 ++
> fs/jbd2/journal.c | 2 ++
> include/trace/events/jbd2.h | 28 ++++++++++++++++++++++++++++
> 3 files changed, 32 insertions(+), 0 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 16a698b..2bfd8b0 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -797,5 +797,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
> J_ASSERT(journal->j_committing_transaction != transaction);
> J_ASSERT(journal->j_running_transaction != transaction);
>
> + trace_jbd2_drop_transaction(journal, transaction);
> +
> jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
> }
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index f24df13..5953b3d 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -1185,6 +1185,8 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
> } else
> write_dirty_buffer(bh, WRITE);
>
> + trace_jbd2_update_superblock_end(journal, wait);
> +
> out:
> /* If we have just flushed the log (by marking s_start==0), then
> * any future commit will have to be careful to update the
> diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
> index 7596441..ae59bc2 100644
> --- a/include/trace/events/jbd2.h
> +++ b/include/trace/events/jbd2.h
> @@ -81,6 +81,13 @@ DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
> TP_ARGS(journal, commit_transaction)
> );
>
> +DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction,
> +
> + TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
> +
> + TP_ARGS(journal, commit_transaction)
> +);
> +
> TRACE_EVENT(jbd2_end_commit,
> TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
>
> @@ -229,6 +236,27 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
> __entry->block_nr, __entry->freed)
> );
>
> +TRACE_EVENT(jbd2_update_superblock_end,
> +
> + TP_PROTO(journal_t *journal, int wait),
> +
> + TP_ARGS(journal, wait),
> +
> + TP_STRUCT__entry(
> + __field( dev_t, dev )
> + __field( int, wait )
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = journal->j_fs_dev->bd_dev;
> + __entry->wait = wait;
> + ),
> +
> + TP_printk("dev %d,%d wait %d",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + __entry->wait)
> +);
> +
> #endif /* _TRACE_JBD2_H */
>
> /* This part must be outside protection */
>
--
|
|
From: Satoru M. <sat...@hd...> - 2011-11-29 22:35:18
|
On 11/29/2011 04:23 AM, Peter Zijlstra wrote: > On Mon, 2011-11-28 at 17:44 -0500, Satoru Moriya wrote: >> In the latency sensitive systems, we usually focus on the worst latency. >> And so, it is useful to save max delays into the per-task delay >> accounting functionality. >> > > No!!! > > Linus told us to be bastards, so there you have it. > > There's way too many different accounting crap thingies around. And > now I get a patch without any justification what so ever. So no, piss off. I agree that there are other ways to get max latency. I think that with delayacct we can easily get delay statistics which each task or task-group encountered in their life time because delayacct records "per-task" delay. But I just may not know better tools/functions. Do you think which tools/functions is the best one (to extend)? ftrace? perf? Regards, Satoru |
|
From: Seiji A. <sei...@hd...> - 2011-11-29 18:16:18
|
Hi
This patch adds trace_jbd2_drop_transaction and trace_jbd2_update_superblock_end
because there are similar tracepoints in jbd and they are needed in jbd2 as well.
Seiji
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
fs/jbd2/checkpoint.c | 2 ++
fs/jbd2/journal.c | 2 ++
include/trace/events/jbd2.h | 28 ++++++++++++++++++++++++++++
3 files changed, 32 insertions(+), 0 deletions(-)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 16a698b..2bfd8b0 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -797,5 +797,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
+ trace_jbd2_drop_transaction(journal, transaction);
+
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f24df13..5953b3d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1185,6 +1185,8 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
} else
write_dirty_buffer(bh, WRITE);
+ trace_jbd2_update_superblock_end(journal, wait);
+
out:
/* If we have just flushed the log (by marking s_start==0), then
* any future commit will have to be careful to update the
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 7596441..ae59bc2 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -81,6 +81,13 @@ DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
TP_ARGS(journal, commit_transaction)
);
+DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction)
+);
+
TRACE_EVENT(jbd2_end_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
@@ -229,6 +236,27 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
__entry->block_nr, __entry->freed)
);
+TRACE_EVENT(jbd2_update_superblock_end,
+
+ TP_PROTO(journal_t *journal, int wait),
+
+ TP_ARGS(journal, wait),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, wait )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->wait = wait;
+ ),
+
+ TP_printk("dev %d,%d wait %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait)
+);
+
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
--
1.7.1
|
|
From: Peter Z. <pe...@in...> - 2011-11-29 10:24:16
|
On Mon, 2011-11-28 at 17:44 -0500, Satoru Moriya wrote: > In the latency sensitive systems, we usually focus on the worst latency. > And so, it is useful to save max delays into the per-task delay > accounting functionality. > > Example output: > (on 100 concurrent dd reading sparse files w/ 1 dd writing a file) > > CPU count real total virtual total delay total delay_max delay average > 222 45993008 74603882 7637295 3691858 0.034ms > IO count delay total delay max delay average > 2 21812073 12728672 10ms > SWAP count delay total delay max delay average > 0 0 0 0ms > RECLAIM count delay total delay_max delay average > 2 348488 211985 0ms > > Any comments are welcome. No!!! Linus told us to be bastards, so there you have it. There's way too many different accounting crap thingies around. And now I get a patch without any justification what so ever. So no, piss off. |
|
From: Satoru M. <sat...@hd...> - 2011-11-28 22:47:17
|
With this patch, getdelays shows maximum delay which it gets from kernel
via taskstats interface.
Output example:
(on 100 concurrent dd reading sparse files with 1 dd writing a file)
CPU count real total virtual total delay total delay_max delay average
234 61990576 78703354 543649 138417 0.002ms
IO count delay total delay max delay average
16 184774999 61259321 11ms
SWAP count delay total delay max delay average
0 0 0 0ms
RECLAIM count delay total delay_max delay average
20 4249456 381288 0ms
Signed-off-by: Satoru Moriya <sat...@hd...>
---
Documentation/accounting/getdelays.c | 28 ++++++++++++++++------------
1 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index f6318f6..69d3ec4 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -197,32 +197,36 @@ static int get_family_id(int sd)
static void print_delayacct(struct taskstats *t)
{
- printf("\n\nCPU %15s%15s%15s%15s%15s\n"
- " %15llu%15llu%15llu%15llu%15.3fms\n"
- "IO %15s%15s%15s\n"
- " %15llu%15llu%15llums\n"
- "SWAP %15s%15s%15s\n"
- " %15llu%15llu%15llums\n"
- "RECLAIM %12s%15s%15s\n"
- " %15llu%15llu%15llums\n",
+ printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llu%15llu%15.3fms\n"
+ "IO %15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llums\n"
+ "SWAP %15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llums\n"
+ "RECLAIM %12s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llums\n",
"count", "real total", "virtual total",
- "delay total", "delay average",
+ "delay total", "delay_max", "delay average",
(unsigned long long)t->cpu_count,
(unsigned long long)t->cpu_run_real_total,
(unsigned long long)t->cpu_run_virtual_total,
(unsigned long long)t->cpu_delay_total,
+ (unsigned long long)t->cpu_delay_max,
average_ms((double)t->cpu_delay_total, t->cpu_count),
- "count", "delay total", "delay average",
+ "count", "delay total", "delay max", "delay average",
(unsigned long long)t->blkio_count,
(unsigned long long)t->blkio_delay_total,
+ (unsigned long long)t->blkio_delay_max,
average_ms(t->blkio_delay_total, t->blkio_count),
- "count", "delay total", "delay average",
+ "count", "delay total", "delay max", "delay average",
(unsigned long long)t->swapin_count,
(unsigned long long)t->swapin_delay_total,
+ (unsigned long long)t->swapin_delay_max,
average_ms(t->swapin_delay_total, t->swapin_count),
- "count", "delay total", "delay average",
+ "count", "delay total", "delay_max", "delay average",
(unsigned long long)t->freepages_count,
(unsigned long long)t->freepages_delay_total,
+ (unsigned long long)t->freepages_delay_max,
average_ms(t->freepages_delay_total, t->freepages_count));
}
--
1.7.6.4
|
|
From: Satoru M. <sat...@hd...> - 2011-11-28 22:46:32
|
This patch adds memberes to struct taskstats to save maximum
CPU/IO/SWAP/RECLAIM delays and saves them in __delayacct_and_tsk().
Signed-off-by: Satoru Moriya <sat...@hd...>
---
Documentation/accounting/taskstats-struct.txt | 9 +++++++++
include/linux/taskstats.h | 8 +++++++-
kernel/delayacct.c | 12 +++++++++++-
3 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index e7512c0..f7c0fa6 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -28,6 +28,8 @@ There are three different groups of fields in the struct taskstats:
6) Extended delay accounting fields for memory reclaim
+7) Extended delay accounting fields for maximum delay
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -177,4 +179,11 @@ struct taskstats {
/* Delay waiting for memory reclaim */
__u64 freepages_count;
__u64 freepages_delay_total;
+
+7) Extended delay accounting fields for maximum delay
+ /* Max value for each delay */
+ __u64 cpu_delay_max;
+ __u64 blkio_delay_max;
+ __u64 swapin_delay_max;
+ __u64 freepages_delay_max;
}
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 2466e55..1a775fb 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -33,7 +33,7 @@
*/
-#define TASKSTATS_VERSION 8
+#define TASKSTATS_VERSION 9
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -163,6 +163,12 @@ struct taskstats {
/* Delay waiting for memory reclaim */
__u64 freepages_count;
__u64 freepages_delay_total;
+
+ /* Max value for each delay */
+ __u64 cpu_delay_max;
+ __u64 blkio_delay_max;
+ __u64 swapin_delay_max;
+ __u64 freepages_delay_max;
};
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 33d090b..ad0fdb5 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -107,7 +107,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
{
s64 tmp;
unsigned long t1;
- unsigned long long t2, t3;
+ unsigned long long t2, t3, t4;
unsigned long flags;
struct timespec ts;
@@ -135,6 +135,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
t1 = tsk->sched_info.pcount;
t2 = tsk->sched_info.run_delay;
t3 = tsk->se.sum_exec_runtime;
+ t4 = tsk->sched_info.max_delay;
d->cpu_count += t1;
@@ -145,6 +146,9 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->cpu_run_virtual_total =
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
+ if (d->cpu_delay_max < t4)
+ d->cpu_delay_max = t4;
+
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
spin_lock_irqsave(&tsk->delays->lock, flags);
@@ -157,6 +161,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count;
+ if (d->blkio_delay_max < tsk->delays->blkio_delay_max)
+ d->blkio_delay_max = tsk->delays->blkio_delay_max;
+ if (d->swapin_delay_max < tsk->delays->swapin_delay_max)
+ d->swapin_delay_max = tsk->delays->swapin_delay_max;
+ if (d->freepages_delay_max < tsk->delays->freepages_delay_max)
+ d->freepages_delay_max = tsk->delays->freepages_delay_max;
spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
--
1.7.6.4
|
|
From: Satoru M. <sat...@hd...> - 2011-11-28 22:46:05
|
This patch adds members to struct task_delay_info and a parameter to
delayacct_end() and saves maximum values for IO/SWAP/RECLAIM delays.
Signed-off-by: Satoru Moriya <sat...@hd...>
---
include/linux/sched.h | 3 +++
kernel/delayacct.c | 13 +++++++++----
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1665e2c..edd8ad2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -768,6 +768,8 @@ struct task_delay_info {
struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */
u64 blkio_delay; /* wait for sync block io completion */
u64 swapin_delay; /* wait for swapin block io completion */
+ u64 blkio_delay_max; /* max wait for sync block io completion */
+ u64 swapin_delay_max; /* max wait for swapin block io completion */
u32 blkio_count; /* total count of the number of sync block */
/* io operations performed */
u32 swapin_count; /* total count of the number of swapin block */
@@ -775,6 +777,7 @@ struct task_delay_info {
struct timespec freepages_start, freepages_end;
u64 freepages_delay; /* wait for memory reclaim */
+ u64 freepages_delay_max;/* max wait for memory reclaim */
u32 freepages_count; /* total count of memory reclaim */
};
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 418b3f7..33d090b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -61,7 +61,7 @@ static inline void delayacct_start(struct timespec *start)
*/
static void delayacct_end(struct timespec *start, struct timespec *end,
- u64 *total, u32 *count)
+ u64 *total, u32 *count, u64 *max)
{
struct timespec ts;
s64 ns;
@@ -76,6 +76,8 @@ static void delayacct_end(struct timespec *start, struct timespec *end,
spin_lock_irqsave(¤t->delays->lock, flags);
*total += ns;
(*count)++;
+ if (*max < ns)
+ *max = ns;
spin_unlock_irqrestore(¤t->delays->lock, flags);
}
@@ -91,12 +93,14 @@ void __delayacct_blkio_end(void)
delayacct_end(¤t->delays->blkio_start,
¤t->delays->blkio_end,
¤t->delays->swapin_delay,
- ¤t->delays->swapin_count);
+ ¤t->delays->swapin_count,
+ ¤t->delays->swapin_delay_max);
else /* Other block I/O */
delayacct_end(¤t->delays->blkio_start,
¤t->delays->blkio_end,
¤t->delays->blkio_delay,
- ¤t->delays->blkio_count);
+ ¤t->delays->blkio_count,
+ ¤t->delays->blkio_delay_max);
}
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -181,6 +185,7 @@ void __delayacct_freepages_end(void)
delayacct_end(¤t->delays->freepages_start,
¤t->delays->freepages_end,
¤t->delays->freepages_delay,
- ¤t->delays->freepages_count);
+ ¤t->delays->freepages_count,
+ ¤t->delays->freepages_delay_max);
}
--
1.7.6.4
|
|
From: Satoru M. <sat...@hd...> - 2011-11-28 22:45:13
|
This patch adds members to struct sched_info and save max delays
into it.
Signed-off-by: Satoru Moriya <sat...@hd...>
---
include/linux/sched.h | 2 ++
kernel/sched_stats.h | 5 +++++
2 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9..1665e2c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -736,6 +736,8 @@ struct sched_info {
/* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */
unsigned long long run_delay; /* time spent waiting on a runqueue */
+ unsigned long long max_delay; /* max run_delay */
+ unsigned long long prev_cpu_delay; /* time spent waiting on prev cpus' runqueue */
/* timestamps */
unsigned long long last_arrival,/* when we last ran on a cpu */
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 87f9e36..48e9db9 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -171,6 +171,7 @@ static inline void sched_info_dequeued(struct task_struct *t)
delta = now - t->sched_info.last_queued;
sched_info_reset_dequeued(t);
t->sched_info.run_delay += delta;
+ t->sched_info.prev_cpu_delay += delta;
rq_sched_info_dequeued(task_rq(t), delta);
}
@@ -190,6 +191,10 @@ static void sched_info_arrive(struct task_struct *t)
t->sched_info.run_delay += delta;
t->sched_info.last_arrival = now;
t->sched_info.pcount++;
+ t->sched_info.prev_cpu_delay += delta;
+ if (t->sched_info.max_delay < t->sched_info.prev_cpu_delay)
+ t->sched_info.max_delay = t->sched_info.prev_cpu_delay;
+ t->sched_info.prev_cpu_delay = 0;
rq_sched_info_arrive(task_rq(t), delta);
}
--
1.7.6.4
|
|
From: Satoru M. <sat...@hd...> - 2011-11-28 22:44:51
|
In the latency sensitive systems, we usually focus on the worst latency.
And so, it is useful to save max delays into the per-task delay
accounting functionality.
Example output:
(on 100 concurrent dd reading sparse files w/ 1 dd writing a file)
CPU count real total virtual total delay total delay_max delay average
222 45993008 74603882 7637295 3691858 0.034ms
IO count delay total delay max delay average
2 21812073 12728672 10ms
SWAP count delay total delay max delay average
0 0 0 0ms
RECLAIM count delay total delay_max delay average
2 348488 211985 0ms
Any comments are welcome.
Satoru Moriya (4):
sched: add members to struct sched_info to save maximum
delayacct: add members to struct task_delay_info to save max delays
delayacct: update taskstats to save max delays
getdelays: show max CPU/IO/SWAP/RECLAIM delays
Documentation/accounting/getdelays.c | 28 ++++++++++++++----------
Documentation/accounting/taskstats-struct.txt | 9 ++++++++
include/linux/sched.h | 5 ++++
include/linux/taskstats.h | 8 ++++++-
kernel/delayacct.c | 25 +++++++++++++++++----
kernel/sched_stats.h | 5 ++++
6 files changed, 62 insertions(+), 18 deletions(-)
--
1.7.6.4
|