From: John L. <le...@mo...> - 2003-02-19 18:57:53
|
An updated version of Will's patch, against 2.5.62. Will, I didn't understand your old check available/filled code. Does the below look fruity to you ? What is the rmb() protecting ? regards john diff -Naur -X dontdiff linux-linus/drivers/oprofile/buffer_sync.c linux/drivers/oprofile/buffer_sync.c --- linux-linus/drivers/oprofile/buffer_sync.c 2003-02-15 18:10:45.000000000 +0000 +++ linux/drivers/oprofile/buffer_sync.c 2003-02-19 18:41:51.000000000 +0000 @@ -310,6 +310,33 @@ } +/* compute number of filled slots in cpu_buffer queue */ +static unsigned long nr_filled_slots(struct oprofile_cpu_buffer * b) +{ + unsigned long head = b->head_pos; + unsigned long tail = b->tail_pos; + + if (head >= tail) + return head - tail; + + return head + (b->buffer_size - tail); +} + + +static void increment_tail(struct oprofile_cpu_buffer * b) +{ + unsigned long new_tail = b->tail_pos + 1; + + /* FIXME - why the rmb() */ + rmb(); + + if (new_tail < (b->buffer_size)) + b->tail_pos = new_tail; + else + b->tail_pos = 0; +} + + /* Sync one of the CPU's buffers into the global event buffer. * Here we need to go through each batch of samples punctuated * by context switch notes, taking the task's mmap_sem and doing @@ -324,8 +351,12 @@ int in_kernel = 1; int i; - for (i=0; i < cpu_buf->pos; ++i) { - struct op_sample * s = &cpu_buf->buffer[i]; + /* Remember, only we can modify tail_pos */ + + unsigned long const available_elements = nr_filled_slots(cpu_buf); + + for (i=0; i < available_elements; ++i) { + struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; if (is_ctx_switch(s->eip)) { if (s->event <= 1) { @@ -345,6 +376,8 @@ } else { add_sample(mm, s, in_kernel); } + + increment_tail(cpu_buf); } release_mm(mm); @@ -369,17 +402,10 @@ cpu_buf = &cpu_buffer[i]; - /* We take a spin lock even though we might - * sleep. It's OK because other users are try - * lockers only, and this region is already - * protected by buffer_sem. It's raw to prevent - * the preempt bogometer firing. Fruity, huh ? */ - if (cpu_buf->pos > 0) { - _raw_spin_lock(&cpu_buf->int_lock); + /* FIXME ... if (cpu_buf->pos > 0) */ add_cpu_switch(i); sync_buffer(cpu_buf); - _raw_spin_unlock(&cpu_buf->int_lock); - } + /* } */ } up(&buffer_sem); diff -Naur -X dontdiff linux-linus/drivers/oprofile/cpu_buffer.c linux/drivers/oprofile/cpu_buffer.c --- linux-linus/drivers/oprofile/cpu_buffer.c 2003-02-15 18:10:45.000000000 +0000 +++ linux/drivers/oprofile/cpu_buffer.c 2003-02-19 18:40:57.000000000 +0000 @@ -26,8 +26,6 @@ struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned; -static unsigned long buffer_size; - static void __free_cpu_buffers(int num) { int i; @@ -47,7 +45,7 @@ { int i; - buffer_size = fs_cpu_buffer_size; + unsigned long buffer_size = fs_cpu_buffer_size; for (i=0; i < NR_CPUS; ++i) { struct oprofile_cpu_buffer * b = &cpu_buffer[i]; @@ -59,12 +57,12 @@ if (!b->buffer) goto fail; - spin_lock_init(&b->int_lock); - b->pos = 0; b->last_task = 0; b->last_is_kernel = -1; + b->buffer_size = buffer_size; + b->tail_pos = 0; + b->head_pos = 0; b->sample_received = 0; - b->sample_lost_locked = 0; b->sample_lost_overflow = 0; b->sample_lost_task_exit = 0; } @@ -80,11 +78,38 @@ __free_cpu_buffers(NR_CPUS); } - -/* Note we can't use a semaphore here as this is supposed to - * be safe from any context. Instead we trylock the CPU's int_lock. - * int_lock is taken by the processing code in sync_cpu_buffers() - * so we avoid disturbing that. + +/* compute number of available slots in cpu_buffer queue */ +static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b) +{ + unsigned long head = b->head_pos; + unsigned long tail = b->tail_pos; + + if (tail >= head) + return tail - head; + + return tail + (b->buffer_size - head); +} + + +static void increment_head(struct oprofile_cpu_buffer * b) +{ + unsigned long new_head = b->head_pos + 1; + + /* Ensure anything written to the slot before we + * increment is visible */ + wmb(); + + if (new_head < (b->buffer_size)) + b->head_pos = new_head; + else + b->head_pos = 0; +} + + +/* This must be safe from any context. It's safe writing here + * because of the head/tail separation of the writer and reader + * of the CPU buffer. * * is_kernel is needed because on some architectures you cannot * tell if you are in kernel or user space simply by looking at @@ -101,14 +126,10 @@ cpu_buf->sample_received++; - if (!spin_trylock(&cpu_buf->int_lock)) { - cpu_buf->sample_lost_locked++; - return; - } - if (cpu_buf->pos > buffer_size - 2) { + if (nr_available_slots(cpu_buf) > cpu_buf->buffer_size - 3) { cpu_buf->sample_lost_overflow++; - goto out; + return; } task = current; @@ -116,18 +137,18 @@ /* notice a switch from user->kernel or vice versa */ if (cpu_buf->last_is_kernel != is_kernel) { cpu_buf->last_is_kernel = is_kernel; - cpu_buf->buffer[cpu_buf->pos].eip = ~0UL; - cpu_buf->buffer[cpu_buf->pos].event = is_kernel; - cpu_buf->pos++; + cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL; + cpu_buf->buffer[cpu_buf->head_pos].event = is_kernel; + increment_head(cpu_buf); } /* notice a task switch */ if (cpu_buf->last_task != task) { cpu_buf->last_task = task; if (!(task->flags & PF_EXITING)) { - cpu_buf->buffer[cpu_buf->pos].eip = ~0UL; - cpu_buf->buffer[cpu_buf->pos].event = (unsigned long)task; - cpu_buf->pos++; + cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL; + cpu_buf->buffer[cpu_buf->head_pos].event = (unsigned long)task; + increment_head(cpu_buf); } } @@ -138,22 +159,21 @@ */ if (task->flags & PF_EXITING) { cpu_buf->sample_lost_task_exit++; - goto out; + return; } - cpu_buf->buffer[cpu_buf->pos].eip = eip; - cpu_buf->buffer[cpu_buf->pos].event = event; - cpu_buf->pos++; -out: - spin_unlock(&cpu_buf->int_lock); + cpu_buf->buffer[cpu_buf->head_pos].eip = eip; + cpu_buf->buffer[cpu_buf->head_pos].event = event; + increment_head(cpu_buf); } + /* resets the cpu buffer to a sane state - should be called with * cpu_buf->int_lock held */ void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf) { - cpu_buf->pos = 0; + /* FIXME, just remove ? cpu_buf->pos = 0; */ /* reset these to invalid values; the next sample * collected will populate the buffer with proper diff -Naur -X dontdiff linux-linus/drivers/oprofile/cpu_buffer.h linux/drivers/oprofile/cpu_buffer.h --- linux-linus/drivers/oprofile/cpu_buffer.h 2003-02-15 18:10:45.000000000 +0000 +++ linux/drivers/oprofile/cpu_buffer.h 2003-02-19 18:18:29.000000000 +0000 @@ -30,14 +30,13 @@ }; struct oprofile_cpu_buffer { - spinlock_t int_lock; - /* protected by int_lock */ - unsigned long pos; + unsigned long head_pos; + unsigned long tail_pos; + unsigned long buffer_size; struct task_struct * last_task; int last_is_kernel; struct op_sample * buffer; unsigned long sample_received; - unsigned long sample_lost_locked; unsigned long sample_lost_overflow; unsigned long sample_lost_task_exit; } ____cacheline_aligned; diff -Naur -X dontdiff linux-linus/drivers/oprofile/oprofile_stats.c linux/drivers/oprofile/oprofile_stats.c --- linux-linus/drivers/oprofile/oprofile_stats.c 2002-12-16 03:54:21.000000000 +0000 +++ linux/drivers/oprofile/oprofile_stats.c 2003-02-19 18:18:54.000000000 +0000 @@ -26,7 +26,6 @@ cpu_buf = &cpu_buffer[i]; cpu_buf->sample_received = 0; - cpu_buf->sample_lost_locked = 0; cpu_buf->sample_lost_overflow = 0; cpu_buf->sample_lost_task_exit = 0; } @@ -62,8 +61,6 @@ */ oprofilefs_create_ro_ulong(sb, cpudir, "sample_received", &cpu_buf->sample_received); - oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_locked", - &cpu_buf->sample_lost_locked); oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow", &cpu_buf->sample_lost_overflow); oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_task_exit", |