Sorry for the delay,

As for building, my distro is "testing Debian system"
For my oprofile, I just did a default
./configure
make
make install

I did another test run.  Here's the results:
I disabled kernel profiling:
CHOSEN_EVENTS_0=GLOBAL_POWER
_EVENTS:100000:1:0:1
NR_CHOSEN=1
SEPARATE_LIB=0
SEPARATE_KERNEL=0
SEPARATE_THREAD=1
SEPARATE_CPU=0
VMLINUX=none
IMAGE_FILTER=/usr/local/bin/oprofiled
BUF_SIZE=65536
CPU_BUF_SIZE=0
CALLGRAPH=0
KERNEL_RANGE=c0100000,c02efb3f
XENIMAGE=none


I started oprofiled
ran my watch command:
watch --interval=2 "opcontrol --dump; opreport -d -n -l /usr/local/bin/oprofiled ; opcontrol --reset"
took longer for the CPU to get high

32010 root      25   0 29072  27m  416 R 86.4  3.1   8:21.82 oprofiled
 5759 indy      15   0  178m  85m  22m R  5.0  9.6   3:09.95 firefox-bin
 5585 root      15   0  105m  65m 6056 S  2.3  7.4   1:24.61 Xorg
 5339 root      24   0  3128 1604 1052 S  0.7  0.2   0:00.02 opcontrol
 5699 indy      15   0 26960  12m  10m S  0.7  1.4   0:01.62 kwin
 5732 indy      15   0 31808  16m  12m S  0.7  1.9    0: 24.29 konsole
 1045 root      15   0  2228 1156  856 R  0.3  0.1   0:02.09 top

I took a opreport and opannotate, it looks like that there are a large amount of samples for the do_match function call processing (frame push and register push on the stack) because it's called so often.  Propose it to be inlined?  Why is it called so often?  Is there something wrong?

root@xerces:/tmp/an# opreport -l /usr/local/bin/oprofiled; opannotate /usr/local/bin/oprofiled --source -i do_match,sfile_find
CPU: P4 / Xeon with 2 hyper-threads, speed 3000.12 MHz (estimated)
Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 100000
samples  %        symbol name
11640    94.1519  do_match
681       5.5084  sfile_find
13        0.1052  opd_process_samples
11        0.0890  pop_buffer_value
4         0.0324  odb_update_node
4         0.0324  sfile_get
4         0.0324  sfile_log_sample
2         0.0162  enough_remaining
2         0.0162  get_file
2         0.0162  odb_open_count


               :static int
               :do_match(struct sfile const * sf, cookie_t cookie, cookie_t app_cookie,
               :         struct kernel_image const * ki, struct anon_mapping const * anon,
               :         pid_t tgid, pid_t tid, unsigned int cpu)
 10251 83.1994 :{ /* do_match total:  11640 94.4729 */
               :        /* this is a simplified check for "is a kernel image" AND
               :         * "is the right kernel image". Also handles no-vmlinux
               :         * correctly.
               :         */
     1  0.0081 :        if (sf->kernel != ki)
               :                return 0;
               :
    67  0.5438 :        if (separate_thread) {
  1124  9.1226 :                if (sf->tid != tid || sf->tgid != tgid)
               :                        return 0;
               :        }
               :
               :        if (separate_cpu) {
               :                if (sf->cpu != cpu)
               :                        return 0;
               :        }
               :
     1  0.0081 :        if (separate_kernel || ((anon || separate_lib) && !ki)) {
               :                if (sf->app_cookie != app_cookie)
               :                        return 0;
               :        }
               :
               :        /* ignore the cached trans->cookie for kernel images,
               :         * it's meaningless and we checked all others already
               :         */
               :        if (ki)
               :                return 1;
               :
               :        if (sf->anon != anon)
               :                return 0;
               :
    63  0.5113 :        return sf->cookie == cookie;
   133  1.0795 :}

               :struct sfile * sfile_find(struct transient const * trans)
     1  0.0081 :{ /* sfile_find total:    681  5.5271 */
               :        struct sfile * sf;
               :        struct list_head * pos;
               :        struct kernel_image * ki = NULL;
               :        unsigned long hash;
               :
               :        if (trans->tracing != TRACING_ON) {
               :                opd_stats[OPD_SAMPLES]++;
     1  0.0081 :                opd_stats[trans->in_kernel == 1 ? OPD_KERNEL : OPD_PROCESS]++;
               :        }
               :
               :        /* There is a small race where this *can* happen, see
               :         * caller of cpu_buffer_reset() in the kernel
               :         */
               :        if (trans->in_kernel == -1) {
               :                verbprintf(vsamples, "Losing sample at 0x%llx of unknown provenance.\n",
               :                           trans->pc);
               :                opd_stats[OPD_NO_CTX]++;
               :                return NULL;
               :        }
               :
               :        /* we might need a kernel image start/end to hash on */
               :        if (trans->in_kernel) {
               :                ki = find_kernel_image(trans);
               :                if (!ki) {
               :                        verbprintf(vsamples, "Lost kernel sample %llx\n", trans->pc);
               :                        opd_stats[OPD_LOST_KERNEL]++;
               :                        return NULL;
               :                }
               :        } else if (trans->cookie == NO_COOKIE && !trans->anon) {
               :                if (vsamples) {
               :                        char const * app = verbose_cookie(trans->app_cookie);
               :                        printf("No anon map for pc %llx, app %s.\n",
               :                               trans->pc, app);
               :                }
               :                opd_stats[OPD_LOST_NO_MAPPING]++;
               :                return NULL;
               :        }
               :
               :        hash = sfile_hash(trans, ki);
   241  1.9560 :        list_for_each(pos, &hashes[hash]) {
    23  0.1867 :                sf = list_entry(pos, struct sfile, hash);
               :                if (trans_match(trans, sf, ki)) {
               :                        sfile_get(sf);
               :                        goto lru;
               :                }
               :        }
               :
               :        sf = create_sfile(hash, trans, ki);
               :        list_add(&sf->hash, &hashes[hash]);
               :
               :lru:
               :        sfile_put(sf);
               :        return sf;
     1  0.0081 :}


I stopped the process with GDB:
do_match (sf=0x9e6afe8, cookie=4148450104, app_cookie=4148844408, ki=0x0, anon=0x0, tgid=26859, tid=26859, cpu=0) at opd_sfile.c:84
84      {
(gdb) bt
#0  do_match (sf=0x9e6afe8, cookie=4148450104, app_cookie=4148844408, ki=0x0, anon=0x0, tgid=26859, tid=26859, cpu=0) at opd_sfile.c:84
#1  0x0804b312 in sfile_find (trans=0xbf90e61c) at opd_sfile.c:124
#2  0x0804c215 in opd_process_samples (buffer=0xb7d4f008 "\002", count=32818) at opd_trans.c:120
#3  0x08049903 in opd_26_start () at init.c:131
#4  0x0804a2c3 in main (argc=Cannot access memory at address 0x0
) at oprofiled.c:496
(gdb) f 0
#0  do_match (sf=0x9e6afe8, cookie=4148450104, app_cookie=4148844408, ki=0x0, anon=0x0, tgid=26859, tid=26859, cpu=0) at opd_sfile.c:84
84      {
(gdb) info f
Stack level 0, frame at 0xbf90e5a0:
 eip = 0x804a870 in do_match (opd_sfile.c:84); saved eip 0x804b312
 called by frame at 0xbf90e600
 source language c.
 Arglist at 0xbf90e598, args: sf=0x9e6afe8, cookie=4148450104, app_cookie=4148844408, ki=0x0, anon=0x0, tgid=26859, tid=26859, cpu=0
 Locals at 0xbf90e598, Previous frame's sp is 0xbf90e5a0
 Saved registers:
  ebx at 0xbf90e58c, ebp at 0xbf90e598, esi at 0xbf90e590, edi at 0xbf90e594, eip at 0xbf90e59c
(gdb) info args
sf = (const struct sfile *) 0x9e6afe8
cookie = 4148450104
app_cookie = 4148844408
ki = (const struct kernel_image *) 0x0
anon = (const struct anon_mapping *) 0x0
tgid = 26859
tid = 26859
cpu = 0
(gdb) info local
No locals.
(gdb) f 1
#1  0x0804b312 in sfile_find (trans=0xbf90e61c) at opd_sfile.c:124
124             return do_match(sfile, trans->cookie, trans->app_cookie, ki,
(gdb) info f
Stack level 1, frame at 0xbf90e600:
 eip = 0x804b312 in sfile_find (opd_sfile.c:124); saved eip 0x804c215
 called by frame at 0xbf90e680, caller of frame at 0xbf90e5a0
 source language c.
 Arglist at 0xbf90e5f8, args: trans=0xbf90e61c
 Locals at 0xbf90e5f8, Previous frame's sp is 0xbf90e600
 Saved registers:
  ebx at 0xbf90e5ec, ebp at 0xbf90e5f8, esi at 0xbf90e5f0, edi at 0xbf90e5f4, eip at 0xbf90e5fc
(gdb) info args
trans = (const struct transient *) 0xbf90e61c
(gdb) p *trans
$1 = {buffer = 0xb7d6d53c "\004", remaining = 1765, tracing = TRACING_OFF, current = 0x0, last = 0x9f35770, anon = 0x0,
  last_anon = 0x0, cookie = 4148450104, app_cookie = 4148844408, pc = 443895, last_pc = 236920, event = 0, in_kernel = 0, cpu = 0,
  tid = 26859, tgid = 26859}
(gdb) info locals
sf = (struct sfile *) 0x9e6afe8
pos = (struct list_head *) 0x9e6b010
ki = (struct kernel_image *) 0x0
hash = 166
(gdb) p *sf
$2 = {hashval = 166, cookie = 4148450104, app_cookie = 18446744073709551615, tid = 23227, tgid = 23227, cpu = 0, kernel = 0x0,
  anon = 0x0, hash = {next = 0x9e6ad70, prev = 0x9e6c970}, lru = {next = 0x9e6af38, prev = 0x932ad90}, ignored = 1, files = {{
      data = 0x0}, {data = 0x0}, {data = 0x0}, {data = 0x0}, {data = 0x0}, {data = 0x0}, {data = 0x0}, {data = 0x0}}, cg_hash = {{
      next = 0x9e6b044, prev = 0x9e6b044}, {next = 0x9e6b04c, prev = 0x9e6b04c}, {next = 0x9e6b054, prev = 0x9e6b054}, {
      next = 0x9e6b05c, prev = 0x9e6b05c}, {next = 0x9e6b064, prev = 0x9e6b064}, {next = 0x9e6b06c, prev = 0x9e6b06c}, {
      next = 0x9e6b074, prev = 0x9e6b074}, {next = 0x9e6b07c, prev = 0x9e6b07c}, {next = 0x9e6b084, prev = 0x9e6b084}, {
      next = 0x9e6b08c, prev = 0x9e6b08c}, {next = 0x9e6b094, prev = 0x9e6b094}, {next = 0x9e6b09c, prev = 0x9e6b09c}, {
      next = 0x9e6b0a4, prev = 0x9e6b0a4}, {next = 0x9e6b0ac, prev = 0x9e6b0ac}, {next = 0x9e6b0b4, prev = 0x9e6b0b4}, {
      next = 0x9e6b0bc, prev = 0x9e6b0bc}}}
(gdb) p *pos
$3 = {next = 0x9e6ad70, prev = 0x9e6c970}
(gdb) f 2
#2  0x0804c215 in opd_process_samples (buffer=0xb7d4f008 "\002", count=32818) at opd_trans.c:120
120                     trans->current = sfile_find(trans);
(gdb) p f
No symbol "f" in current context.
(gdb) info f
Stack level 2, frame at 0xbf90e680:
 eip = 0x804c215 in opd_process_samples (opd_trans.c:120); saved eip 0x8049903
 called by frame at 0xbf90e6b0, caller of frame at 0xbf90e600
 source language c.
 Arglist at 0xbf90e678, args: buffer=0xb7d4f008 "\002", count=32818
 Locals at 0xbf90e678, Previous frame's sp is 0xbf90e680
 Saved registers:
  ebx at 0xbf90e66c, ebp at 0xbf90e678, esi at 0xbf90e670, edi at 0xbf90e674, eip at 0xbf90e67c
(gdb) info args
buffer = 0xb7d4f008 "\002"
count = 32818
(gdb) info locals
trans = {buffer = 0xb7d6d53c "\004", remaining = 1765, tracing = TRACING_OFF, current = 0x0, last = 0x9f35770, anon = 0x0,
  last_anon = 0x0, cookie = 4148450104, app_cookie = 4148844408, pc = 443895, last_pc = 236920, event = 0, in_kernel = 0, cpu = 0,
  tid = 26859, tgid = 26859}
code = 443895


On 4/20/07, William Cohen < wcohen@redhat.com> wrote:
Chris N wrote:
> I made a GUI for doing run-time profiling of an app but noticed the
> oprofiled starts at around 2-3% CPU usage and gradually goes to 90%
> within a few minutes.
> Am I doing something wrong with my config?   I think it may be some
> snowballing bug with the --reset.
> I just run oprofiled with the following rc:
>
> root@xerces:/usr/local/bin# cat /root/.oprofile/daemonrc
> CHOSEN_EVENTS_0=GLOBAL_POWER_EVENTS:100000:1:1:1
> NR_CHOSEN=1
> SEPARATE_LIB=0
> SEPARATE_KERNEL=0
> SEPARATE_THREAD=1
> SEPARATE_CPU=0
> VMLINUX=/usr/src/linux/vmlinux
> IMAGE_FILTER=/usr/local/bin/oprofiled
> BUF_SIZE=65536
> CPU_BUF_SIZE=0
> CALLGRAPH=0
> KERNEL_RANGE=c0100000,c02efb3f
> XENIMAGE=none
>
> I just run one simple watch command:
> watch --interval=2 "opcontrol --dump; opreport -d -n -l
> /usr/local/bin/oprofiled ; opcontrol --reset"
>
> CPU info:
> indy@xerces:/home/indy$ cat /proc/cpuinfo
> processor       : 0
> vendor_id       : GenuineIntel
> cpu family      : 15
> model           : 3
> model name      : Intel(R) Pentium(R) 4 CPU 3.00GHz
> stepping        : 4
> cpu MHz         : 3000.116
> cache size      : 1024 KB
> physical id     : 0
> siblings        : 1
> core id         : 0
> cpu cores       : 1
> fdiv_bug        : no
> hlt_bug         : no
> f00f_bug        : no
> coma_bug        : no
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 5
> wp              : yes
> flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
> mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
> constant_tsc up pni monitor ds_cpl cid xtpr
> bogomips        : 6005.36
> clflush size    : 64
>
> oprofile:
> oprofile-0.9.2
>
> LINUX version:
> indy@xerces:/home/indy$ uname -a
> Linux xerces 2.6.20.6 <http://2.6.20.6> #1 SMP PREEMPT Thu Apr 12
> 20:55:13 UTC 2007 i686 GNU/Linux
>
> I compiled it myself, have HT and SMP
>
> After about 3 minutes:
>   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
> 18221 root      25   0  7728 6532  420 R 91.6  0.7   0:39.73 oprofiled
>  5578 indy      15   0 32080  16m  12m S  1.0  1.9   0:32.51 konsole
>  5432 root      15   0  169m 111m 6956 S  0.3 12.6   8:05.21 Xorg
>  5575 indy      15   0 29964  14m  11m S   0.3  1.7   0:03.57 konsole
>  5583 indy      15   0 26060  11m 9632 S  0.3  1.3    0:00.78 klipper

Hi

It would be a lot easier to take a look at the results if the counts were
annotated to the source code of oprofiled. Could you use opannotate to map the
samples back to the source code for the hot functions? It looks like do_match
and sfile_find are the problem functions. Something like the following command
should give easier to understand output:

opannotate /usr/bin/oprofiled --source -i do_match -i sfile_find

Every 2 seconds the command causing oprofile to dump all the data, generate a
complete report of all the samples, and then erase all the samples. OProfile has
to look at a fair number of files to generate the output for opreport.
I think what is happening is that oprofile needs to figure out if debuginfo
files match up with executable. This is done by computing a checksum which is
expensive. Does this self built version of oprofile have the the debuginfo split
out, e.g. built as an RPM?

-Will