From: Balbir S. <ba...@in...> - 2006-05-02 06:22:28
|
Changelog Fixes suggested by Jay Lan - check for tidstats before taking the mutex_lock in taskstats_exit_send() - add back version information for struct taskstats Fixes comments by akpm (on earlier patch now incorporated here) - detailed comments on atomicity rules of accounting fields - replace use of nsec_t Other changes - fix coding style delayacct-taskstats.patch Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <na...@us...> Signed-off-by: Balbir Singh <ba...@in...> --- include/linux/delayacct.h | 13 +++++++++++ include/linux/taskstats.h | 53 ++++++++++++++++++++++++++++++++++++++++++++-- init/Kconfig | 1 kernel/delayacct.c | 42 ++++++++++++++++++++++++++++++++++++ kernel/taskstats.c | 13 ++++++++++- 5 files changed, 119 insertions(+), 3 deletions(-) diff -puN include/linux/delayacct.h~delayacct-taskstats include/linux/delayacct.h --- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530 +++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h 2006-05-02 10:39:30.000000000 +0530 @@ -18,6 +18,7 @@ #define _LINUX_DELAYACCT_H #include <linux/sched.h> +#include <linux/taskstats_kern.h> /* * Per-task flags relevant to delay accounting @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(void); +extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); static inline void delayacct_set_flag(int flag) { @@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v __delayacct_blkio_end(); } +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ + if (!tsk->delays) + return -EINVAL; + return __delayacct_add_tsk(d, tsk); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start {} static inline void delayacct_blkio_end(void) {} +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ return 0; } #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff -puN include/linux/taskstats.h~delayacct-taskstats include/linux/taskstats.h --- linux-2.6.17-rc3/include/linux/taskstats.h~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530 +++ linux-2.6.17-rc3-balbir/include/linux/taskstats.h 2006-05-02 10:36:31.000000000 +0530 @@ -33,9 +33,58 @@ struct taskstats { - /* Version 1 */ + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ + + /* version of taskstats */ + __u64 version; - int filler_avoids_empty_struct_warnings; }; diff -puN init/Kconfig~delayacct-taskstats init/Kconfig --- linux-2.6.17-rc3/init/Kconfig~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530 +++ linux-2.6.17-rc3-balbir/init/Kconfig 2006-05-02 10:36:31.000000000 +0530 @@ -164,6 +164,7 @@ config TASKSTATS config TASK_DELAY_ACCT bool "Enable per-task delay accounting (EXPERIMENTAL)" + depends on TASKSTATS help Collect information on time spent by a task waiting for system resources like cpu, synchronous block I/O completion and swapping diff -puN kernel/delayacct.c~delayacct-taskstats kernel/delayacct.c --- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530 +++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-02 10:37:57.000000000 +0530 @@ -104,3 +104,45 @@ void __delayacct_blkio_end(void) ¤t->delays->blkio_delay, ¤t->delays->blkio_count); } + +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) +{ + s64 tmp; + struct timespec ts; + unsigned long t1,t2,t3; + + tmp = (s64)d->cpu_run_real_total; + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC; + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; + + /* + * No locking available for sched_info (and too expensive to add one) + * Mitigate by taking snapshot of values + */ + t1 = tsk->sched_info.pcnt; + t2 = tsk->sched_info.run_delay; + t3 = tsk->sched_info.cpu_time; + + d->cpu_count += t1; + + jiffies_to_timespec(t2, &ts); + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; + + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; + d->cpu_run_virtual_total = + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; + + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ + + spin_lock(&tsk->delays->lock); + tmp = d->blkio_delay_total + tsk->delays->blkio_delay; + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; + tmp = d->swapin_delay_total + tsk->delays->swapin_delay; + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; + d->blkio_count += tsk->delays->blkio_count; + d->swapin_count += tsk->delays->swapin_count; + spin_unlock(&tsk->delays->lock); + + return 0; +} diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-02 10:36:31.000000000 +0530 +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-02 10:36:31.000000000 +0530 @@ -18,6 +18,7 @@ #include <linux/kernel.h> #include <linux/taskstats_kern.h> +#include <linux/delayacct.h> #include <net/genetlink.h> #include <asm/atomic.h> @@ -120,7 +121,9 @@ static int fill_pid(pid_t pid, struct ta * goto err; */ -err: + rc = delayacct_add_tsk(stats, tsk); + + /* Define err: label here if needed */ put_task_struct(tsk); return rc; @@ -152,6 +155,10 @@ static int fill_tgid(pid_t tgid, struct * break; */ + rc = delayacct_add_tsk(stats, tsk); + if (rc) + break; + } while_each_thread(first, tsk); read_unlock(&tasklist_lock); @@ -255,6 +262,8 @@ void taskstats_exit_send(struct task_str if (rc < 0) goto err_skb; + tidstats->version = TASKSTATS_VERSION; + na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID); NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid); NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, @@ -270,6 +279,8 @@ void taskstats_exit_send(struct task_str if (rc < 0) goto err_skb; + tgidstats->version = TASKSTATS_VERSION; + na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, _ |
From: Jay L. <jl...@en...> - 2006-05-04 02:13:57
|
Balbir Singh wrote: >Changelog > >Fixes suggested by Jay Lan >- check for tidstats before taking the mutex_lock in taskstats_exit_send() >- add back version information for struct taskstats > ><clip> > > struct taskstats { > >- /* Version 1 */ >+ /* Delay accounting fields start >+ * >+ * All values, until comment "Delay accounting fields end" are >+ * available only if delay accounting is enabled, even though the last >+ * few fields are not delays >+ * >+ * xxx_count is the number of delay values recorded >+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds >+ * >+ * xxx_delay_total wraps around to zero on overflow >+ * xxx_count incremented regardless of overflow >+ */ >+ >+ /* Delay waiting for cpu, while runnable >+ * count, delay_total NOT updated atomically >+ */ >+ __u64 cpu_count; >+ __u64 cpu_delay_total; >+ >+ /* Following four fields atomically updated using task->delays->lock */ >+ >+ /* Delay waiting for synchronous block I/O to complete >+ * does not account for delays in I/O submission >+ */ >+ __u64 blkio_count; >+ __u64 blkio_delay_total; >+ >+ /* Delay waiting for page fault I/O (swap in only) */ >+ __u64 swapin_count; >+ __u64 swapin_delay_total; >+ >+ /* cpu "wall-clock" running time >+ * On some architectures, value will adjust for cpu time stolen >+ * from the kernel in involuntary waits due to virtualization. >+ * Value is cumulative, in nanoseconds, without a corresponding count >+ * and wraps around to zero silently on overflow >+ */ >+ __u64 cpu_run_real_total; >+ >+ /* cpu "virtual" running time >+ * Uses time intervals seen by the kernel i.e. no adjustment >+ * for kernel's involuntary waits due to virtualization. >+ * Value is cumulative, in nanoseconds, without a corresponding count >+ * and wraps around to zero silently on overflow >+ */ >+ __u64 cpu_run_virtual_total; >+ /* Delay accounting fields end */ >+ /* version 1 ends here */ >+ >+ /* version of taskstats */ >+ __u64 version; > Could you place the common field "version" before any acct subsystem specific fields? As a matter of fact, we do not need 'filler_avoids_empty_struct_warnings' in [patch 5/8] taskstats interface. Replacing that field with "version" would be great! Thanks, - jay > >- int filler_avoids_empty_struct_warnings; > }; > > > |
From: Balbir S. <ba...@in...> - 2006-05-04 04:26:50
|
On Wed, May 03, 2006 at 07:13:54PM -0700, Jay Lan wrote: > Balbir Singh wrote: > >Changelog > > > >Fixes suggested by Jay Lan > >- check for tidstats before taking the mutex_lock in taskstats_exit_send() > >- add back version information for struct taskstats > > > ><clip> > > > > struct taskstats { > > > >- /* Version 1 */ > >+ /* Delay accounting fields start > >+ * > >+ * All values, until comment "Delay accounting fields end" are > >+ * available only if delay accounting is enabled, even though the last > >+ * few fields are not delays > >+ * > >+ * xxx_count is the number of delay values recorded > >+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds > >+ * > >+ * xxx_delay_total wraps around to zero on overflow > >+ * xxx_count incremented regardless of overflow > >+ */ > >+ > >+ /* Delay waiting for cpu, while runnable > >+ * count, delay_total NOT updated atomically > >+ */ > >+ __u64 cpu_count; > >+ __u64 cpu_delay_total; > >+ > >+ /* Following four fields atomically updated using task->delays->lock */ > >+ > >+ /* Delay waiting for synchronous block I/O to complete > >+ * does not account for delays in I/O submission > >+ */ > >+ __u64 blkio_count; > >+ __u64 blkio_delay_total; > >+ > >+ /* Delay waiting for page fault I/O (swap in only) */ > >+ __u64 swapin_count; > >+ __u64 swapin_delay_total; > >+ > >+ /* cpu "wall-clock" running time > >+ * On some architectures, value will adjust for cpu time stolen > >+ * from the kernel in involuntary waits due to virtualization. > >+ * Value is cumulative, in nanoseconds, without a corresponding count > >+ * and wraps around to zero silently on overflow > >+ */ > >+ __u64 cpu_run_real_total; > >+ > >+ /* cpu "virtual" running time > >+ * Uses time intervals seen by the kernel i.e. no adjustment > >+ * for kernel's involuntary waits due to virtualization. > >+ * Value is cumulative, in nanoseconds, without a corresponding count > >+ * and wraps around to zero silently on overflow > >+ */ > >+ __u64 cpu_run_virtual_total; > >+ /* Delay accounting fields end */ > >+ /* version 1 ends here */ > >+ > >+ /* version of taskstats */ > >+ __u64 version; > > > > Could you place the common field "version" before any acct subsystem > specific fields? > > As a matter of fact, we do not need > 'filler_avoids_empty_struct_warnings' in [patch 5/8] taskstats > interface. Replacing that field with "version" would be great! Yes, I thought about it and wanted to put it upfront. To maintain compatibility with the previous post, I decided to add it to the end. If putting the version right up helps the readability of taskstats.h, I can make the changes and repost the patches again. Thanks for your review, Balbir |
From: Balbir S. <ba...@in...> - 2006-05-04 20:30:08
|
Changelog Fixes suggested by Jay Lan - check for tidstats before taking the mutex_lock in taskstats_exit_send() - add back and fill version information for struct taskstats Fixes comments by akpm (on earlier patch now incorporated here) - detailed comments on atomicity rules of accounting fields - replace use of nsec_t delayacct-taskstats.patch Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <na...@us...> Signed-off-by: Balbir Singh <ba...@in...> --- include/linux/delayacct.h | 13 ++++++++++++ include/linux/taskstats.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++ init/Kconfig | 1 kernel/delayacct.c | 42 +++++++++++++++++++++++++++++++++++++++ kernel/taskstats.c | 12 ++++++++++- 5 files changed, 116 insertions(+), 1 deletion(-) diff -puN include/linux/delayacct.h~delayacct-taskstats include/linux/delayacct.h --- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 +++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h 2006-05-04 11:26:18.000000000 +0530 @@ -18,6 +18,7 @@ #define _LINUX_DELAYACCT_H #include <linux/sched.h> +#include <linux/taskstats_kern.h> /* * Per-task flags relevant to delay accounting @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(void); +extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); static inline void delayacct_set_flag(int flag) { @@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v __delayacct_blkio_end(); } +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ + if (!tsk->delays) + return -EINVAL; + return __delayacct_add_tsk(d, tsk); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start {} static inline void delayacct_blkio_end(void) {} +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ return 0; } #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff -puN include/linux/taskstats.h~delayacct-taskstats include/linux/taskstats.h --- linux-2.6.17-rc3/include/linux/taskstats.h~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 +++ linux-2.6.17-rc3-balbir/include/linux/taskstats.h 2006-05-04 09:34:11.000000000 +0530 @@ -35,6 +35,55 @@ struct taskstats { /* Version 1 */ __u64 version; + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ }; diff -puN init/Kconfig~delayacct-taskstats init/Kconfig --- linux-2.6.17-rc3/init/Kconfig~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 +++ linux-2.6.17-rc3-balbir/init/Kconfig 2006-05-04 09:31:59.000000000 +0530 @@ -164,6 +164,7 @@ config TASKSTATS config TASK_DELAY_ACCT bool "Enable per-task delay accounting (EXPERIMENTAL)" + depends on TASKSTATS help Collect information on time spent by a task waiting for system resources like cpu, synchronous block I/O completion and swapping diff -puN kernel/delayacct.c~delayacct-taskstats kernel/delayacct.c --- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 +++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-04 11:26:18.000000000 +0530 @@ -104,3 +104,45 @@ void __delayacct_blkio_end(void) ¤t->delays->blkio_delay, ¤t->delays->blkio_count); } + +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) +{ + s64 tmp; + struct timespec ts; + unsigned long t1,t2,t3; + + tmp = (s64)d->cpu_run_real_total; + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC; + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; + + /* + * No locking available for sched_info (and too expensive to add one) + * Mitigate by taking snapshot of values + */ + t1 = tsk->sched_info.pcnt; + t2 = tsk->sched_info.run_delay; + t3 = tsk->sched_info.cpu_time; + + d->cpu_count += t1; + + jiffies_to_timespec(t2, &ts); + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; + + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; + d->cpu_run_virtual_total = + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; + + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ + + spin_lock(&tsk->delays->lock); + tmp = d->blkio_delay_total + tsk->delays->blkio_delay; + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; + tmp = d->swapin_delay_total + tsk->delays->swapin_delay; + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; + d->blkio_count += tsk->delays->blkio_count; + d->swapin_count += tsk->delays->swapin_count; + spin_unlock(&tsk->delays->lock); + + return 0; +} diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-04 11:27:53.000000000 +0530 @@ -18,6 +18,7 @@ #include <linux/kernel.h> #include <linux/taskstats_kern.h> +#include <linux/delayacct.h> #include <net/genetlink.h> #include <asm/atomic.h> @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta * goto err; */ -err: + rc = delayacct_add_tsk(stats, tsk); + stats->version = TASKSTATS_VERSION; + + /* Define err: label here if needed */ put_task_struct(tsk); return rc; @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct * break; */ + rc = delayacct_add_tsk(stats, tsk); + if (rc) + break; + } while_each_thread(first, tsk); read_unlock(&tasklist_lock); + stats->version = TASKSTATS_VERSION; + /* * Accounting subsytems can also add calls here if they don't _ |
From: Thomas G. <tg...@li...> - 2006-05-09 09:46:34
|
On Fri, 2006-05-05 at 00:14 +0530, Balbir Singh wrote: > #else > static inline void delayacct_set_flag(int flag) > {} Please make this static inline void delayacct_set_flag(int flag) { } Same in all other files > @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start > {} > static inline void delayacct_blkio_end(void) > {} > +static inline int delayacct_add_tsk(struct taskstats *d, > + struct task_struct *tsk) > +{ return 0; } { return 0; } > + * > + * xxx_count is the number of delay values recorded > + * xxx_delay_total is the corresponding cumulative delay in nanoseconds > + * > + * xxx_delay_total wraps around to zero on overflow > + * xxx_count incremented regardless of overflow > + */ Please use a structure for that. struct delay_account { ktime_t delay; u64 count; }; Also instead of having tons of fields added, please use something like enum { CPU_ACCT, BLKIO_ACCT, ...., MAX_ACCT_TYPES }; struct delay_account stats[MAX_ACCT_TYPES]; > + /* Delay waiting for cpu, while runnable > + * count, delay_total NOT updated atomically > + */ > + __u64 cpu_count; > + __u64 cpu_delay_total; > + /* Following four fields atomically updated using task->delays->lock */ > + > + /* Delay waiting for synchronous block I/O to complete > + * does not account for delays in I/O submission > + */ > + __u64 blkio_count; > + __u64 blkio_delay_total; > + > + /* Delay waiting for page fault I/O (swap in only) */ > + __u64 swapin_count; > + __u64 swapin_delay_total; > + > + /* cpu "wall-clock" running time > + * On some architectures, value will adjust for cpu time stolen > + * from the kernel in involuntary waits due to virtualization. > + * Value is cumulative, in nanoseconds, without a corresponding count > + * and wraps around to zero silently on overflow When will this overflow happen ? 2^64 nsec ~= 584 years > +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) > +{ Please add some comment what this code does. > + s64 tmp; > + struct timespec ts; > + unsigned long t1,t2,t3; > + > + tmp = (s64)d->cpu_run_real_total; > + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC; > + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; > + > + /* > + * No locking available for sched_info (and too expensive to add one) > + * Mitigate by taking snapshot of values > + */ > + t1 = tsk->sched_info.pcnt; > + t2 = tsk->sched_info.run_delay; > + t3 = tsk->sched_info.cpu_time; > + > + d->cpu_count += t1; > + > + jiffies_to_timespec(t2, &ts); > + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); > + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; > + > + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; > + d->cpu_run_virtual_total = > + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; > + > + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ > + > + spin_lock(&tsk->delays->lock); > + tmp = d->blkio_delay_total + tsk->delays->blkio_delay; > + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; I really do not get whatfor all these comparisons are. How can those values get > 2^63 within a reasonable timeframe ? > + tmp = d->swapin_delay_total + tsk->delays->swapin_delay; > + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; > + d->blkio_count += tsk->delays->blkio_count; > + d->swapin_count += tsk->delays->swapin_count; > + spin_unlock(&tsk->delays->lock); > + return 0; > +} > diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c > --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 > +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-04 11:27:53.000000000 +0530 > @@ -18,6 +18,7 @@ > > #include <linux/kernel.h> > #include <linux/taskstats_kern.h> > +#include <linux/delayacct.h> > #include <net/genetlink.h> > #include <asm/atomic.h> > > @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta > * goto err; > */ > > -err: > + rc = delayacct_add_tsk(stats, tsk); > + stats->version = TASKSTATS_VERSION; > + > + /* Define err: label here if needed */ Please remove that comment. The err label goes to a place where it makes sense. > put_task_struct(tsk); > return rc; > > @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct > * break; > */ > > + rc = delayacct_add_tsk(stats, tsk); > + if (rc) > + break; > + > } while_each_thread(first, tsk); > read_unlock(&tasklist_lock); > + stats->version = TASKSTATS_VERSION; > + > > /* > * Accounting subsytems can also add calls here if they don't > _ > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to maj...@vg... > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ |
From: Balbir S. <ba...@in...> - 2006-05-09 17:15:19
|
On Tue, May 09, 2006 at 11:46:48AM +0000, Thomas Gleixner wrote: > On Fri, 2006-05-05 at 00:14 +0530, Balbir Singh wrote: > > > #else > > static inline void delayacct_set_flag(int flag) > > {} > > Please make this > > static inline void delayacct_set_flag(int flag) { } > > Same in all other files Will do. > > > @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start > > {} > > static inline void delayacct_blkio_end(void) > > {} > > +static inline int delayacct_add_tsk(struct taskstats *d, > > + struct task_struct *tsk) > > +{ return 0; } > > { > return 0; > } > > > + * > > + * xxx_count is the number of delay values recorded > > + * xxx_delay_total is the corresponding cumulative delay in nanoseconds > > + * > > + * xxx_delay_total wraps around to zero on overflow > > + * xxx_count incremented regardless of overflow > > + */ > > Please use a structure for that. > > struct delay_account { > ktime_t delay; > u64 count; > }; > > Also instead of having tons of fields added, please use something like > > enum { > CPU_ACCT, > BLKIO_ACCT, > ...., > MAX_ACCT_TYPES > }; > struct delay_account stats[MAX_ACCT_TYPES]; > Yes, this would make sense if all the fields were of type delay_account. currently, only CPU, BLKIO and SWAPIN are of that type. The new fields that will be added to this structure (since this is a part of a statistics interface now) could potentially belong to other subsystems. > > > + /* Delay waiting for cpu, while runnable > > + * count, delay_total NOT updated atomically > > + */ > > + __u64 cpu_count; > > + __u64 cpu_delay_total; > > + /* Following four fields atomically updated using task->delays->lock */ > > + > > + /* Delay waiting for synchronous block I/O to complete > > + * does not account for delays in I/O submission > > + */ > > + __u64 blkio_count; > > + __u64 blkio_delay_total; > > + > > + /* Delay waiting for page fault I/O (swap in only) */ > > + __u64 swapin_count; > > + __u64 swapin_delay_total; > > + > > + /* cpu "wall-clock" running time > > + * On some architectures, value will adjust for cpu time stolen > > + * from the kernel in involuntary waits due to virtualization. > > + * Value is cumulative, in nanoseconds, without a corresponding count > > + * and wraps around to zero silently on overflow > > When will this overflow happen ? 2^64 nsec ~= 584 years > Detecting a potential overflow is not a bad thing IMHO. Currently the existing data type makes this overflow very rare. > > > +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) > > +{ > > Please add some comment what this code does. Will do > > > + s64 tmp; > > + struct timespec ts; > > + unsigned long t1,t2,t3; > > + > > + tmp = (s64)d->cpu_run_real_total; > > + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC; > > + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; > > + > > + /* > > + * No locking available for sched_info (and too expensive to add one) > > + * Mitigate by taking snapshot of values > > + */ > > + t1 = tsk->sched_info.pcnt; > > + t2 = tsk->sched_info.run_delay; > > + t3 = tsk->sched_info.cpu_time; > > + > > + d->cpu_count += t1; > > + > > + jiffies_to_timespec(t2, &ts); > > + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); > > + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; > > + > > + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; > > + d->cpu_run_virtual_total = > > + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; > > + > > + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ > > + > > + spin_lock(&tsk->delays->lock); > > + tmp = d->blkio_delay_total + tsk->delays->blkio_delay; > > + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; > > I really do not get whatfor all these comparisons are. How can those > values get > 2^63 within a reasonable timeframe ? Its defensive programming, granted that it is probably not required. > > > + tmp = d->swapin_delay_total + tsk->delays->swapin_delay; > > + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; > > + d->blkio_count += tsk->delays->blkio_count; > > + d->swapin_count += tsk->delays->swapin_count; > > + spin_unlock(&tsk->delays->lock); > > + return 0; > > +} > > diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c > > --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats 2006-05-04 09:31:59.000000000 +0530 > > +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c 2006-05-04 11:27:53.000000000 +0530 > > @@ -18,6 +18,7 @@ > > > > #include <linux/kernel.h> > > #include <linux/taskstats_kern.h> > > +#include <linux/delayacct.h> > > #include <net/genetlink.h> > > #include <asm/atomic.h> > > > > @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta > > * goto err; > > */ > > > > -err: > > + rc = delayacct_add_tsk(stats, tsk); > > + stats->version = TASKSTATS_VERSION; > > + > > + /* Define err: label here if needed */ > > Please remove that comment. The err label goes to a place where it makes > sense. Will fix this. > > > put_task_struct(tsk); > > return rc; > > > > @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct > > * break; > > */ > > > > + rc = delayacct_add_tsk(stats, tsk); > > + if (rc) > > + break; > > + > > } while_each_thread(first, tsk); > > read_unlock(&tasklist_lock); > > + stats->version = TASKSTATS_VERSION; > > + > > > > /* > > * Accounting subsytems can also add calls here if they don't > > _ > > - > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to maj...@vg... > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ Thanks for your review, Balbir Singh, Linux Technology Center, IBM Software Labs |