From: Adam L. <ag...@us...> - 2003-07-18 18:29:42
|
Kernprof provides, among other things, call graph support which enables us to see which function calls (arcs) have been generated within any given time period. Unfortunately, kernprof is a very large patch which makes it difficult to maintain and integrate with other patches. I am working on a smaller patch that generates only function call counts using mcount() just like kernprof. This should boot as-is because none of the source files are built with the -pg option. For the moment you must manually select which objects are profiled by recompiling them with -pg. I have had best results with net/socket.o for testing. When any of the source files are compiled with -pg, an entry /proc/mcount will appear which contains the raw hash table of arcs. This file is read by a userland tool (in development) to generate human readable output. There are two serious problems with this patch so far: (1) Due to some shady assembly stubs in arch/i386/lib/mcount.S, the kernel will die during boot if all files are compiled with -pg. There must be an issue with the way mcount is called that does not play nicely with some functions. (2) The code in cg_record_arc() seems to be generating the hash structure correctly, but reads from the proc entry are returning inconsistant data. If you could take a look and respond with comments/suggestions/flames I'd really appreciate it. --------- snip --------- diff -Naur linux-2.5.73-clean/arch/i386/kernel/i386_ksyms.c linux-2.5.73-test/arch/i386/kernel/i386_ksyms.c --- linux-2.5.73-clean/arch/i386/kernel/i386_ksyms.c 2003-06-22 11:33:35.000000000 -0700 +++ linux-2.5.73-test/arch/i386/kernel/i386_ksyms.c 2003-07-18 05:34:20.000000000 -0700 @@ -187,6 +187,9 @@ EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(memset); +extern void mcount(void); +EXPORT_SYMBOL_NOVERS(mcount); + #ifdef CONFIG_HAVE_DEC_LOCK EXPORT_SYMBOL(atomic_dec_and_lock); #endif diff -Naur linux-2.5.73-clean/arch/i386/kernel/Makefile linux-2.5.73-test/arch/i386/kernel/Makefile --- linux-2.5.73-clean/arch/i386/kernel/Makefile 2003-06-22 11:32:35.000000000 -0700 +++ linux-2.5.73-test/arch/i386/kernel/Makefile 2003-07-18 05:34:20.000000000 -0700 @@ -31,6 +31,7 @@ obj-$(CONFIG_MODULES) += module.o obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat.o +obj-y += mcount.o EXTRA_AFLAGS := -traditional diff -Naur linux-2.5.73-clean/arch/i386/kernel/mcount.c linux-2.5.73-test/arch/i386/kernel/mcount.c --- linux-2.5.73-clean/arch/i386/kernel/mcount.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.73-test/arch/i386/kernel/mcount.c 2003-07-18 05:34:20.000000000 -0700 @@ -0,0 +1,291 @@ +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/smp.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <asm/uaccess.h> + +#define DFL_PC_RES 4 /* default PC resolution for this platform */ +#define CG_MAX_ARCS (1 << (8 * sizeof(short))) +#define FUNCTIONPC(func) (*(unsigned long *)&(func)) +void UNKNOWN_KERNEL(void) {} /* Dummy functions to make profiles more */ +void UNKNOWN_MODULE(void) {} /* descriptive */ + +extern char _stext, _etext, _sinittext, _einittext; +#if 0 +#define pc_out_of_range(pc) \ + ((pc) < (unsigned long) &_stext || (pc) >= (unsigned long) &_etext) +#endif +#define pc_out_of_range(pc) \ + ((pc) < (unsigned long) &_stext || \ + (pc) >= (unsigned long) &_einittext) + +#if CPU != 386 +/* Like the above but also returns the result */ +static __inline__ int atomic_add_return(int i, atomic_t *v) +{ + register int oldval; + __asm__ __volatile__( + LOCK "xaddl %2,%0" + :"=m" (v->counter), "=r" (oldval) + :"1" (i), "m" (v->counter) : "memory"); + return oldval + i; +} +#endif + +struct cg_arc_dest { + unsigned long address; + atomic_t count; + unsigned short link; + unsigned short pad; +}; + +unsigned int mcount_shift, PC_resolution = DFL_PC_RES; +unsigned long total_mcount[NR_CPUS]; +unsigned long lost_ones[NR_CPUS]; +//unsigned long module_start, module_end; + +struct prof_mem_map +{ + unsigned long kernel_buckets; /* number of kernel buckets */ + unsigned long module_buckets; /* number of module buckets */ + unsigned long nr_cpus; /* number of processors whether profiled or not */ + unsigned long cg_from_size; /* size of one cg_from array */ + unsigned long cg_to_size; /* size of one cg_to array */ + unsigned long cg_to_offset; /* offset of cg_to array */ + unsigned long kernel_start; /* lowest text address in kernel */ + unsigned long kernel_end; /* highest text address in kernel */ + unsigned long module_start; /* lowest text address in all modules */ + unsigned long module_end; /* highest text address in all modules */ +} *memory_map; + +char* memory_start = NULL; +unsigned short *cg_from_base = NULL; +struct cg_arc_dest *cg_to_base = NULL; +size_t cg_from_size, cg_to_size; +int cg_arc_overflow; /* set when no new arcs can be added to the call graph */ +int n_buckets = 0; +size_t mem_needed; /* space needed for the call graph and the PC samples */ +int mcount_enabled = 0; /* -1: disabled, 0: ready (uninitialized), 1: ready*/ +struct proc_dir_entry *mcount_ent; + +ssize_t mcount_read(struct file *filp, char *buff, + size_t count, loff_t *offp) { + int bytes_left, i; + + printk("Reading from file offset %i\n", *offp); + bytes_left = mem_needed - *offp; + if(count > bytes_left) + i = bytes_left; + else + i = count; + if(copy_to_user(buff, memory_start + *offp, i)) { + printk("copy_to_user failed!\n"); + return -EFAULT; + } + *offp += i; + return i; +} + +ssize_t mcount_write(struct file *filp, const char *buf, size_t count, loff_t +*offp) { +/* if(cg_from_base) { + memset(cg_from_base, 0, mem_needed); + return count; + } + return -ENODEV; */ + unsigned long *p; + for(p = (unsigned long)memory_start; (void *)p < (void *)cg_from_base; ++p) + printk("%p: %lu\n", p, *p); + printk("cg_to_offset should be: %p - %p = %lu\n", cg_to_base, cg_from_base, (unsigned long)&cg_to_base - (unsigned long)cg_from_base); + return count; +} + +static struct file_operations mcount_fops = { + read: mcount_read, + write: mcount_write, +}; + + +static int mcount_alloc_mem(void) +{ + //char *p; + unsigned long cg_from_size, cg_to_size; + int i; + size_t text_size = (unsigned long) &_einittext - (unsigned long) &_stext; + for(i=0; i<100; ++i) + //printk("Entering mcount_alloc_mem...\n"); + for (mcount_shift = 0; (1 << mcount_shift) < PC_resolution; mcount_shift++); + //printk("Using mcount_shift = %u\n", mcount_shift); + n_buckets = text_size >> mcount_shift; + cg_from_size = n_buckets * sizeof(short); + cg_to_size = CG_MAX_ARCS * sizeof(struct cg_arc_dest); + mem_needed = sizeof(struct prof_mem_map) + + (cg_from_size + cg_to_size) + * num_online_cpus(); + if ((memory_start = vmalloc(mem_needed)) == NULL) { + //printk("Failed to allocate memory!\n"); + return -ENOMEM; + } + memory_map = (struct prof_mem_map*) memory_start; + memory_map->cg_from_size = cg_from_size; + memory_map->cg_to_size = cg_to_size; + + cg_from_base = (unsigned short *) (memory_start + sizeof(struct prof_mem_map)); + cg_to_base = (struct cg_arc_dest *) (&cg_from_base + memory_map->cg_from_size * num_online_cpus()); + //printk("n_buckets = %i\ncg_from_sz = %lu\ncg_to_sz = %lu\n \ + // mem_needed = %lu\ncg_from_base = %p\ncg_to_base = %p\n", \ + // n_buckets, /*memory_map.*/cg_from_size, /*memory_map.*/cg_to_size, \ + // mem_needed, cg_from_base, cg_to_base); + //printk("Kernel text [%p - %p]\n", &_stext, &_etext); + printk("Memory allocated at [%lu - %lu]\n", memory_start, memory_start + mem_needed); + /* Fill the memory map structure for the userland tool */ + memory_map->kernel_buckets = n_buckets; + memory_map->nr_cpus = num_online_cpus(); + memory_map->cg_to_offset = memory_map->cg_from_size * memory_map->nr_cpus; + memory_map->kernel_start = (unsigned long) &_stext; + memory_map->kernel_end = (unsigned long) &_einittext; + + return 0; +} + +static void mcount_free_mem(void) +{ + kfree(cg_from_base); +} + +int mcount_init() +{ + if(mcount_alloc_mem()) + return -ENOMEM; + mcount_ent = create_proc_entry("mcount", 0, 0); + if(!mcount_ent) + return -ENODEV; + mcount_ent->size = mem_needed; + mcount_ent->proc_fops = &mcount_fops; + mcount_enabled = 1; + return 0; +} + +void mcount_cleanup() +{ + mcount_enabled = -1; + mcount_free_mem(); + return; +} + +/* Record an arc traversal in the call graph. Called by mcount(). SMP safe */ +void cg_record_arc(unsigned long frompc, unsigned long selfpc) +{ +#ifndef __HAVE_ARCH_CMPXCHG16 + static spinlock_t cg_record_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; +#endif + int toindex; + int fromindex; + int cpu; + unsigned short *q; + struct cg_arc_dest *p; + unsigned short *cg_from; + struct cg_arc_dest *cg_to; + + if(selfpc < &_stext || selfpc > &_etext) + return; + else if((!mcount_enabled) && mcount_init()) + return; + //printk("Entered cg_record_arc...\n"); + //printk("frompc = %p\n selfpc = %p\n", frompc, selfpc); + cpu = smp_processor_id(); + /*if (!cpu_prof_enabled[cpu]) + return;*/ + total_mcount[cpu]++; + + cg_from = (u_short *)(((char *)cg_from_base) + memory_map->cg_from_size * cpu); + //printk("cg_from = %p\n", cg_from); + cg_to = &cg_to_base[CG_MAX_ARCS * cpu]; + if (pc_out_of_range(frompc)) + { + //printk("pc out of range\n"); + //printk("unknown kernel = %p\n", FUNCTIONPC(UNKNOWN_KERNEL)); + //if (frompc >= memory_map.module_start && frompc < memory_map.module_end) + //fromindex = (FUNCTIONPC(UNKNOWN_MODULE) - (unsigned long)&_stext) >> mcount_shift; + //else + fromindex = (FUNCTIONPC(UNKNOWN_KERNEL) - (unsigned long)&_stext) >> mcount_shift; + } + else + fromindex = (frompc - (unsigned long) &_stext) >> mcount_shift; + //printk("fromindex = %i\n", fromindex); + q = &cg_from[fromindex]; + + //printk("q = %p\n", q); + //printk("*q = %i\n", *q); + /* Easy case: the arc is already in the call graph */ + //printk("Begin [for] traversal of call graph...\n"); + for (toindex = *q; toindex != 0; ) { + p = &cg_to[toindex]; + //printk("p = %p\n", p); + if (p->address == selfpc) { + //printk("*** arc present - incrementing...\n"); + atomic_inc(&p->count); + return; + } + toindex = p->link; + } + //printk("Completed for traversal\n"); + /* + * No luck. We need to add a new arc. Since cg_to[0] is unused, + * we use cg_to[0].count to keep track of the next available arc. + */ + if (cg_arc_overflow) + { + lost_ones[cpu]++; + //printk("*** arc overflow - ignoring...\n"); + return; + } + //printk("Adding one to count@[%p]->count...", cg_to); + toindex = atomic_add_return(1, &cg_to->count); + //printk("done.\n"); + if (toindex >= CG_MAX_ARCS) { + /* + * We have run out of space for arcs. We'll keep incrementing + * the existing ones but we won't try to add any more. + */ + lost_ones[cpu]++; + cg_arc_overflow = 1; + atomic_set(&cg_to->count, CG_MAX_ARCS - 1); + //printk("*** Ran out of space - setting arc overflow...\n"); + return; + } + //printk("No overflow condition.\n"); + /* + * We have a secured slot for a new arc and all we need to do is + * initialize it and add it to a hash bucket. We use compare&swap, if + * possible, to avoid any spinlocks whatsoever. + */ + //printk("Adding new arc...\n"); + p = &cg_to[toindex]; + p->address = selfpc; + atomic_set(&p->count, 1); +#ifdef __HAVE_ARCH_CMPXCHG16 + //printk("Using CMPXCHG16\n"); + do { + p->link = *q; + } while (cmpxchg(q, p->link, toindex) != p->link);//else +#else + //printk("Not using CMPXCHG16\n"); + spin_lock_irqsave(&cg_record_lock, flags); + p->link = *q; + *q = toindex; + spin_unlock_irqrestore(&cg_record_lock, flags); +#endif + //printk("*** Added new arc...\n"); + printk("%p -> %p\n", frompc, p->address); + return; +} + +void (*mcount_hook)(unsigned long, unsigned long) = cg_record_arc; + diff -Naur linux-2.5.73-clean/arch/i386/kernel/semaphore.c linux-2.5.73-test/arch/i386/kernel/semaphore.c --- linux-2.5.73-clean/arch/i386/kernel/semaphore.c 2003-06-22 11:32:42.000000000 -0700 +++ linux-2.5.73-test/arch/i386/kernel/semaphore.c 2003-07-18 05:34:20.000000000 -0700 @@ -191,6 +191,7 @@ ".align 4\n" ".globl __down_failed\n" "__down_failed:\n\t" +// "call mcount_stext_lock\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" @@ -214,6 +215,7 @@ ".align 4\n" ".globl __down_failed_interruptible\n" "__down_failed_interruptible:\n\t" +// "call mcount_stext_lock\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" @@ -235,6 +237,7 @@ ".align 4\n" ".globl __down_failed_trylock\n" "__down_failed_trylock:\n\t" +// "call mcount_stext_lock\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" @@ -256,6 +259,7 @@ ".align 4\n" ".globl __up_wakeup\n" "__up_wakeup:\n\t" +// "call mcount_stext_lock\n\t" "pushl %eax\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" diff -Naur linux-2.5.73-clean/arch/i386/lib/Makefile linux-2.5.73-test/arch/i386/lib/Makefile --- linux-2.5.73-clean/arch/i386/lib/Makefile 2003-06-22 11:32:31.000000000 -0700 +++ linux-2.5.73-test/arch/i386/lib/Makefile 2003-07-18 05:34:20.000000000 -0700 @@ -10,3 +10,4 @@ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o lib-$(CONFIG_DEBUG_IOVIRT) += iodebug.o +lib-y += mcount.o diff -Naur linux-2.5.73-clean/arch/i386/lib/mcount.S linux-2.5.73-test/arch/i386/lib/mcount.S --- linux-2.5.73-clean/arch/i386/lib/mcount.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.73-test/arch/i386/lib/mcount.S 2003-07-18 05:34:20.000000000 -0700 @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2000 SGI + * + * Written by Dimitris Michailidis dim...@sg... + * + * This file implements mcount(), which is used to collect profiling data. + * We provide several variants to accomodate different types of callers at + * the lowest possible overhead. + */ + +#include <linux/config.h> +#include <linux/linkage.h> + +#define MCOUNT_HEAD \ + pushl %ecx /* We must protect the arguments of FASTCALLs */; \ + movl mcount_hook, %ecx; \ + testl %ecx, %ecx; \ + jz 1f; \ + pushl %eax; \ + pushl %edx; \ + movl 12(%esp), %eax; /* mcount()'s parent */ \ + pushl %eax; \ + +#define MCOUNT_TAIL \ + call *%ecx; \ + popl %eax; \ + popl %edx; \ + popl %eax; \ + popl %eax; \ +1: popl %ecx + +/* + * This is the main variant and is called by C code. GCC's -pg option + * automatically instruments every C function with a call to this. + */ +ENTRY(mcount) + MCOUNT_HEAD + movl 4(%ebp), %eax /* mcount()'s parent's parent */ + pushl %eax + MCOUNT_TAIL + ret + +/* + * This variant is used by assembly functions. Must be inserted by hand. + */ +ENTRY(mcount_asm) + //MCOUNT_HEAD + //movl 16(%esp), %eax /* mcount()'s parent's parent */ + //MCOUNT_TAIL + ret +/* + * This variant is used by assembly functions in section .stext.lock. + * Must be inserted by hand. + */ +ENTRY(mcount_stext_lock) + //MCOUNT_HEAD + //movl 16(%esp), %eax /* mcount()'s parent's parent */ + //addl 1(%eax), %eax /* this and the next lines are magic */ + //leal 5(%eax), %eax + //MCOUNT_TAIL + ret diff -Naur linux-2.5.73-clean/Makefile linux-2.5.73-test/Makefile --- linux-2.5.73-clean/Makefile 2003-06-22 11:32:58.000000000 -0700 +++ linux-2.5.73-test/Makefile 2003-07-18 05:35:18.000000000 -0700 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 SUBLEVEL = 73 -EXTRAVERSION = +EXTRAVERSION = +acg # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -207,7 +207,7 @@ CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) LDFLAGS_MODULE = -r -CFLAGS_KERNEL = +#CFLAGS_KERNEL = -pg AFLAGS_KERNEL = NOSTDINC_FLAGS = -nostdinc -iwithprefix include @@ -304,10 +304,6 @@ -include .config.cmd -ifndef CONFIG_FRAME_POINTER -CFLAGS += -fomit-frame-pointer -endif - # # INSTALL_PATH specifies where to place the updated kernel and system map # images. Uncomment if you want to place them anywhere other than root. |