From: John L. <mov...@us...> - 2001-09-21 08:20:05
|
Update of /cvsroot/oprofile/oprofile In directory usw-pr-cvs1:/tmp/cvs-serv20927 Modified Files: Tag: BRANCH_NO_THREAD ChangeLog Makefile.in configure.in oprofile.c oprofile.h Log Message: proper fix for no-thread, updates from mainline Index: ChangeLog =================================================================== RCS file: /cvsroot/oprofile/oprofile/ChangeLog,v retrieving revision 1.125 retrieving revision 1.125.2.1 diff -u -d -r1.125 -r1.125.2.1 --- ChangeLog 2001/09/19 20:08:21 1.125 +++ ChangeLog 2001/09/21 08:20:02 1.125.2.1 @@ -1,3 +1,20 @@ +2001-09-20 Philippe Elie <ph...@cl...> + + * Makefile.in: minor change in module building + * pp/Makefile.in: link oprofpp with opf_filter + * pp/op_to_source: avoid pipe from oprofpp to opf_filter + * pp/opf_filter.h: remove a few member function + * pp/opf_container.cpp: ditto + minor cleanup + * pp/opf_filter.cpp: oprofpp is no longer piped to opf_filter + but linked with it. Many change, needs additional cleanup + * pp/oprofpp.c: group all stuff in two class. Rather a + a C with class than a full C++ file for now. Fix a bug + in filename handling. + * pp/oprofpp.h: add opp_bfd and opp_samples_files class. + + * dae/opd_util.c: add a few explicit cast for C++ compile + * dae/opd_util.h: wrap function declaratio with extern "C" + 2001-09-19 John Levon <mo...@co...> * oprofile.c: fix silly read valid bug @@ -51,10 +68,10 @@ * acinclude.m4: * autogen.sh: - * configure.in: + * configure.in: * gui/Makefile.in: * gui/ui/Makefile.in: - * gui/ui/oprof_start.base.ui: + * gui/ui/oprof_start.base.ui: * gui/oprof_start.h: autoconfiscation for Qt2 * gui/oprof_start_config.cpp: add comments @@ -546,14 +563,14 @@ 2001-07-21 Philippe Elie <ph...@cl...> - * doc/oprofile.sgml: - * gui/oprofile: - * oprofile.c: remove edge_detect support. + * doc/oprofile.sgml: + * gui/oprofile: + * oprofile.c: remove edge_detect support. - * pp/oprofpp.c: check counter range for each event type. - * oprofile.c: ditto. - * gui/oprofile: ditto and save setup for each event type. - * oprofile.h: op_check_range() : display the allowed range. + * pp/oprofpp.c: check counter range for each event type. + * oprofile.c: ditto. + * gui/oprofile: ditto and save setup for each event type. + * oprofile.h: op_check_range() : display the allowed range. 2001-07-18 Philippe Elie <ph...@cl...> Index: Makefile.in =================================================================== RCS file: /cvsroot/oprofile/oprofile/Makefile.in,v retrieving revision 1.28 retrieving revision 1.28.2.1 diff -u -d -r1.28 -r1.28.2.1 --- Makefile.in 2001/09/19 20:14:14 1.28 +++ Makefile.in 2001/09/21 08:20:02 1.28.2.1 @@ -74,8 +74,8 @@ KCFLAGS := $(BKCFLAGS) -march=i686 ASMFLAGS := -D__ASSEMBLY__ -DMODULE -D__KERNEL__ -traditional -oprofile.o: op_init.o op_util.o oprofile_c.o oprofile_nmi.o op_syscalls.o op_events.o op_x86.o - ld -r -o $@ op_init.o op_util.o oprofile_c.o oprofile_nmi.o op_syscalls.o op_events.o op_x86.o +oprofile.o: op_init.o op_util.o oprofile_c.o oprofile_nmi.o op_syscalls.o op_events_module.o op_x86.o + ld -r -o $@ $^ oprofile.h: op_user.h version.h @@ -97,8 +97,8 @@ oprofile.s: oprofile.c oprofile.h $(CC) $(KCFLAGS) -S $< -op_events.o: op_events.c op_user.h - $(CC) $(KCFLAGS) -c -o $@ $< +op_events_module.o: op_events.c op_user.h + $(CC) $(KCFLAGS) -c -o op_events_module.o $< oprofile_nmi.o: oprofile_nmi.S $(CC) $(ASMFLAGS) -c -o $@ $< Index: configure.in =================================================================== RCS file: /cvsroot/oprofile/oprofile/configure.in,v retrieving revision 1.32 retrieving revision 1.32.2.1 diff -u -d -r1.32 -r1.32.2.1 --- configure.in 2001/09/18 01:00:33 1.32 +++ configure.in 2001/09/21 08:20:02 1.32.2.1 @@ -91,6 +91,19 @@ void monkey() __attribute__((malloc)); ],AC_MSG_RESULT("yes"); CFLAGS="$SAVE_CFLAGS -DMALLOC_OK", AC_MSG_RESULT("no"); CFLAGS="$SAVE_CFLAGS") +# FIXME !!!!!!!! +# AC_MSG_CHECKING("whether __builtin_expect is understood") +# SAVE_CFLAGS=$CFLAGS +# CFLAGS="-Werror $CFLAGS" +# AC_TRY_COMPILE(,[ +# int main() { +# int i; +# if (__builtin_expect(i, 0)) { +# } +# } +# ],AC_MSG_RESULT("yes"); BKCFLAGS="$BKCFLAGS -DEXPECT_OK", AC_MSG_RESULT("no");) +# CFLAGS=$SAVE_CFLAGS + AC_MSG_CHECKING(for x86 architecture) AX_COMPILE_OPTION(CONFIG_X86,x86=1,x86=0) Index: oprofile.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/oprofile.c,v retrieving revision 1.91.2.1 retrieving revision 1.91.2.2 diff -u -d -r1.91.2.1 -r1.91.2.2 --- oprofile.c 2001/09/20 04:56:01 1.91.2.1 +++ oprofile.c 2001/09/21 08:20:02 1.91.2.2 @@ -71,36 +71,45 @@ /* ---------------- NMI handler ------------------ */ -/* FIXME: this whole handler would probably be better in straight asm */ -static void evict_op_entry(struct _oprof_data *data, struct op_sample *ops, struct pt_regs *regs) +inline static int need_wakeup(uint cpu, struct _oprof_data * data) { - uint cpu = op_cpu_id(); + return data->nextbuf >= (data->buf_size - OP_PRE_WATERMARK) && !oprof_ready[cpu]; +} +inline static void next_sample(struct _oprof_data * data) +{ + if (unlikely(++data->nextbuf == data->buf_size)) + data->nextbuf = 0; +} + +inline static void evict_op_entry(uint cpu, struct _oprof_data * data, const struct op_sample *ops, const struct pt_regs *regs) +{ memcpy(&data->buffer[data->nextbuf], ops, sizeof(struct op_sample)); - if (++data->nextbuf < (data->buf_size - OP_PRE_WATERMARK) || oprof_ready[cpu]) { - if (data->nextbuf == data->buf_size) - data->nextbuf = 0; + next_sample(data); + if (likely(!need_wakeup(cpu, data))) return; - } - oprof_ready[cpu] = 1; /* rationale : * * other CPUs are not a race concern since we synch on oprof_wait->lock. * * for the current CPU, we might have interrupted another user of e.g. - * runqueue_lock, deadlocking on SMP and racing on UP. So we have to make - * sure the interrupted code on this CPU was not running in kernel space. + * runqueue_lock, deadlocking on SMP and racing on UP. So we check that IRQs + * were not disabled (corresponding to the irqsave/restores in __wake_up() * * This will mean that approaching the end of the buffer, a number of the * evictions may fail to wake up the daemon. We simply hope this doesn't * take long; a pathological case could cause buffer overflow (which will * be less of an issue when we have a separate map device anyway). * - * FIXME: this is a real problem when we disable user-space sampling. + * Note that we use oprof_ready as our flag for whether we have initiated a + * wake-up. Once the wake-up is received, the flag is reset as well as + * data->nextbuf, preventing multiple wakeups. */ - if (user_mode(regs)) + if (likely(regs->eflags & IF_MASK)) { + oprof_ready[cpu] = 1; wake_up(&oprof_wait); + } } inline static void fill_op_entry(struct op_sample *ops, struct pt_regs *regs, int ctr) @@ -110,58 +119,58 @@ ops->count = (1U << OP_BITS_COUNT)*ctr + 1; } -inline static void op_do_profile(struct _oprof_data *data, struct pt_regs *regs, int ctr) +inline static void op_do_profile(uint cpu, struct pt_regs *regs, int ctr) { + struct _oprof_data * data = &oprof_data[cpu]; uint h = op_hash(regs->eip, current->pid, ctr); uint i; for (i=0; i < OP_NR_ENTRY; i++) { - if (!op_miss(data->entries[h].samples[i])) { + if (likely(!op_miss(data->entries[h].samples[i]))) { data->entries[h].samples[i].count++; set_perfctr(data->ctr_count[ctr], ctr); return; - } else if (op_full_count(data->entries[h].samples[i].count)) { + } else if (unlikely(op_full_count(data->entries[h].samples[i].count))) { goto full_entry; - } else if (!data->entries[h].samples[i].count) + } else if (unlikely(!data->entries[h].samples[i].count)) goto new_entry; } - evict_op_entry(data, &data->entries[h].samples[data->next], regs); + evict_op_entry(cpu, data, &data->entries[h].samples[data->next], regs); fill_op_entry(&data->entries[h].samples[data->next], regs, ctr); data->next = (data->next + 1) % OP_NR_ENTRY; out: set_perfctr(data->ctr_count[ctr], ctr); return; full_entry: - evict_op_entry(data, &data->entries[h].samples[i], regs); + evict_op_entry(cpu, data, &data->entries[h].samples[i], regs); new_entry: fill_op_entry(&data->entries[h].samples[i],regs,ctr); goto out; } -static void op_check_ctr(struct _oprof_data *data, struct pt_regs *regs, int ctr) +static void op_check_ctr(uint cpu, struct pt_regs *regs, int ctr) { ulong l,h; get_perfctr(l, h, ctr); - if (ctr_overflowed(l)) { - op_do_profile(data, regs, ctr); - op_irq_stats[op_cpu_id()]++; + if (likely(ctr_overflowed(l))) { + op_do_profile(cpu, regs, ctr); + op_irq_stats[cpu]++; } } asmlinkage void op_do_nmi(struct pt_regs *regs) { - struct _oprof_data *data = &oprof_data[op_cpu_id()]; + uint cpu = op_cpu_id(); int i; - if (pid_filter && current->pid != pid_filter) + if (unlikely(pid_filter) && likely(current->pid != pid_filter)) return; - if (pgrp_filter && current->pgrp != pgrp_filter) + if (unlikely(pgrp_filter) && likely(current->pgrp != pgrp_filter)) return; - for (i = 0 ; i < op_nr_counters ; ++i) { - op_check_ctr(data, regs, i); - } + for (i = 0 ; i < op_nr_counters ; ++i) + op_check_ctr(cpu, regs, i); } /* ---------------- PMC setup ------------------ */ @@ -328,6 +337,7 @@ /* ---------------- driver routines ------------------ */ +#if 0 static u32 diethreaddie; static pid_t threadpid; @@ -383,17 +393,19 @@ kill_proc(SIGKILL, threadpid, 1); wait_for_completion(&threadstop); } - -#define wrap_nextbuf() do { \ - if (++data->nextbuf == (data->buf_size - OP_PRE_WATERMARK)) { \ - oprof_ready[0] = 1; \ - wake_up(&oprof_wait); \ - } else if (data->nextbuf == data->buf_size) \ - data->nextbuf = 0; \ - } while (0) +#endif spinlock_t note_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; +inline static void oprof_wrap_buf(struct _oprof_data * data) +{ + next_sample(data); + if (likely(!need_wakeup(0, data))) + return; + oprof_ready[0] = 1; + wake_up(&oprof_wait); +} + void oprof_put_mapping(struct op_mapping *map) { struct _oprof_data *data = &oprof_data[0]; @@ -410,11 +422,11 @@ data->buffer[data->nextbuf].count = ((map->is_execve) ? OP_EXEC : OP_MAP) | map->hash; - wrap_nextbuf(); + oprof_wrap_buf(data); data->buffer[data->nextbuf].eip = map->len; data->buffer[data->nextbuf].pid = map->offset & 0xffff; data->buffer[data->nextbuf].count = map->offset >> 16; - wrap_nextbuf(); + oprof_wrap_buf(data); pmc_select_start(0); spin_unlock(¬e_lock); @@ -432,7 +444,7 @@ pmc_select_stop(0); memcpy(&data->buffer[data->nextbuf], samp, sizeof(struct op_sample)); - wrap_nextbuf(); + oprof_wrap_buf(data); pmc_select_start(0); spin_unlock(¬e_lock); @@ -848,11 +860,9 @@ ops->count = 0; - if (++data->nextbuf != (data->buf_size - OP_PRE_WATERMARK)) { - if (data->nextbuf == data->buf_size) - data->nextbuf=0; + next_sample(data); + if (likely(!need_wakeup(cpu, data))) return; - } oprof_ready[cpu] = 1; } @@ -863,7 +873,6 @@ int i,j; down(&sysctlsem); - if (!prof_on) goto out; @@ -878,6 +887,7 @@ /* clean out the hash table as far as possible */ for (cpu=0; cpu < smp_num_cpus; cpu++) { struct _oprof_data * data = &oprof_data[cpu]; + spin_lock(¬e_lock); pmc_select_stop(cpu); for (i=0; i < data->hash_size; i++) { for (j=0; j < OP_NR_ENTRY; j++) @@ -885,6 +895,7 @@ if (oprof_ready[cpu]) break; } + spin_unlock(¬e_lock); oprof_ready[cpu] = 2; pmc_select_start(cpu); } Index: oprofile.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/oprofile.h,v retrieving revision 1.54 retrieving revision 1.54.2.1 diff -u -d -r1.54 -r1.54.2.1 --- oprofile.h 2001/09/18 10:00:07 1.54 +++ oprofile.h 2001/09/21 08:20:02 1.54.2.1 @@ -174,6 +174,22 @@ #define op_cpu_id() (cpu_number_map(smp_processor_id())) +/* branch prediction */ +#ifdef EXPECT_OK +#ifndef likely +#define likely(a) __builtin_expect((a), 1) +#endif +#ifndef unlikely +#define unlikely(a) __builtin_expect((a), 0) +#endif +#else +#ifdef likely +#error likely defined - kernel compiler and compiler specified do not match ! +#endif +#define likely(a) (a) +#define unlikely(a) (a) +#endif + /* we can't unload safely on SMP */ #ifdef CONFIG_SMP #define smp_can_unload() (allow_unload) |