From: Subrata M. <su...@li...> - 2009-03-31 15:13:27
|
Hi Mathieu, On Wed, 2009-03-18 at 11:29 +0530, Subrata Modak wrote: Hi Mathieu, > > On Tue, 2009-03-17 at 11:41 -0400, Mathieu Desnoyers wrote: > > * Subrata Modak (tos...@gm...) wrote: > > > Hi Mathieu, > > > > > > On Tue, Mar 17, 2009 at 7:02 AM, Mathieu Desnoyers < > > > mat...@po...> wrote: > > > > > > > Hi, > > > > > > > > I am trying to get access to some non-x86 hardware to run some atomic > > > > primitive benchmarks for a paper on LTTng I am preparing. That should be > > > > useful to argue about performance benefit of per-cpu atomic operations > > > > vs interrupt disabling. I would like to run the following benchmark > > > > module on CONFIG_SMP : > > > > > > > > - PowerPC > > > > - MIPS > > > > - ia64 > > > > - alpha > > > > > > > > usage : > > > > make > > > > insmod test-cmpxchg-nolock.ko > > > > insmod: error inserting 'test-cmpxchg-nolock.ko': -1 Resource temporarily > > > > unavailable > > > > dmesg (see dmesg output) > > > > > > > > > > With your permission, can we include this test in LTP ( > > > http://ltp.sourceforge.net/), in some appropriate place as a small benchmark > > > test ? > > > > > > > Hi Subrata, > > > > Sure, maybe you'll want to use a better interface than a module init > > that fails though. :) > > Please Cc me when you come up with a better interface. Meanwhile, i will > find out a better way to integrate this with LTP and will notify you > when i do that. Thanks. How about the following simple patch ? This will integrate it to LTP. Nemeth, Comments ? > > > > Mathieu > > > > > Regards-- > > > Subrata > > > > > > > > > > If some of you would be kind enough to run my test module provided below > > > > and provide the results of these tests on a recent kernel (2.6.26~2.6.29 > > > > should be good) along with their cpuinfo, I would greatly appreciate. > > > > > > > > Here are the CAS results for various Intel-based architectures : > > > > > > > > Architecture | Speedup | CAS | > > > > Interrupts | > > > > | (cli + sti) / local cmpxchg | local | sync | Enable > > > > (sti) | Disable (cli) > > > > > > > > ------------------------------------------------------------------------------------------------- > > > > Intel Pentium 4 | 5.24 | 25 | 81 | 70 > > > > | 61 | > > > > AMD Athlon(tm)64 X2 | 4.57 | 7 | 17 | 17 > > > > | 15 | > > > > Intel Core2 | 6.33 | 6 | 30 | 20 > > > > | 18 | > > > > Intel Xeon E5405 | 5.25 | 8 | 24 | 20 > > > > | 22 | > > > > > > > > The benefit expected on PowerPC, ia64 and alpha should principally come > > > > from removed memory barriers in the local primitives. > > > > > > > > Thanks, > > > > > > > > Mathieu > > > > > > > > P.S. please forgive the coding style and hackish interface. :) > > > > --- --- ltp-full-20090331.orig/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/Makefile 1970-01-01 05:30:00.000000000 +0530 +++ ltp-full-20090331/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/Makefile 2009-03-31 20:33:16.000000000 +0530 @@ -0,0 +1,20 @@ +ifneq ($(KERNELRELEASE),) + obj-m += test-cmpxchg-nolock.o +else +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) +KERNELRELEASE = $(shell cat $(KERNELDIR)/$(KBUILD_OUTPUT)/include/linux/version.h | sed -n 's/.*UTS_RELEASE.*\"\(.*\)\".*/\1/p') +ifneq ($(INSTALL_MOD_PATH),) + DEPMOD_OPT := -b $(INSTALL_MOD_PATH) +endif + +default: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules + +modules_install: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install + if [ -f $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map ] ; then /sbin/depmod -ae -F $(KERNELDIR)/$(KBUILD_OUTPUT)/System.map $(DEPMOD_OPT) $(KERNELRELEASE) ; fi + +clean: + $(MAKE) -C $(KERNELDIR) M=$(PWD) clean +endif --- ltp-full-20090331.orig/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/test-cmpxchg-nolock.c 1970-01-01 05:30:00.000000000 +0530 +++ ltp-full-20090331/testcases/kernel/device-drivers/misc_modules/per_cpu_atomic_operations_vs_interrupt_disabling_module/test-cmpxchg-nolock.c 2009-03-31 20:34:04.000000000 +0530 @@ -0,0 +1,301 @@ +/******************************************************************************/ +/* */ +/* Copyright (c) Mathieu Desnoyers <mat...@po...>, 2009 */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */ +/* the GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* */ +/* usage : + make + insmod test-cmpxchg-nolock.ko + insmod: error inserting 'test-cmpxchg-nolock.ko': + -1 Resource temporarily unavailable + dmesg (see dmesg output) */ +/******************************************************************************/ + + + +/* test-cmpxchg-nolock.c +* +* Compare local cmpxchg with irq disable / enable. +*/ + + +#include <linux/jiffies.h> +#include <linux/compiler.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/math64.h> +#include <asm/timex.h> +#include <asm/system.h> + +#define NR_LOOPS 20000 + +int test_val; + +static void do_testbaseline(void) +{ + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { + asm volatile (""); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for baseline\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> baseline takes %llu cycles\n", time); + printk(KERN_ALERT "test end\n"); +} + +static void do_test_sync_cmpxchg(void) +{ + int ret; + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { +#ifdef CONFIG_X86_32 + ret = sync_cmpxchg(&test_val, 0, 0); +#else + ret = cmpxchg(&test_val, 0, 0); +#endif + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for locked cmpxchg\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> locked cmpxchg takes %llu cycles\n", time); + printk(KERN_ALERT "test end\n"); +} + +static void do_test_cmpxchg(void) +{ + int ret; + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { + ret = cmpxchg_local(&test_val, 0, 0); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for non locked cmpxchg\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> non locked cmpxchg takes %llu cycles\n", time); + printk(KERN_ALERT "test end\n"); +} +static void do_test_sync_inc(void) +{ + int ret; + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + atomic_t val; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { + ret = atomic_add_return(10, &val); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for locked add return\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> locked add return takes %llu cycles\n", time); + printk(KERN_ALERT "test end\n"); +} + + +static void do_test_inc(void) +{ + int ret; + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + local_t loc_val; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { + ret = local_add_return(10, &loc_val); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for non locked add return\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> non locked add return takes %llu cycles\n", time); + printk(KERN_ALERT "test end\n"); +} + + + +/* + * This test will have a higher standard deviation due to incoming interrupts. + */ +static void do_test_enable_int(void) +{ + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { + local_irq_restore(flags); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for enabling interrupts (STI)\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> enabling interrupts (STI) takes %llu cycles\n", + time); + printk(KERN_ALERT "test end\n"); +} + +static void do_test_disable_int(void) +{ + unsigned long flags, flags2; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for ( i = 0; i < NR_LOOPS; i++) { + local_irq_save(flags2); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for disabling interrupts (CLI)\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> disabling interrupts (CLI) takes %llu cycles\n", + time); + printk(KERN_ALERT "test end\n"); +} + +static void do_test_int(void) +{ + unsigned long flags; + unsigned int i; + cycles_t time1, time2, time; + u32 rem; + + local_irq_save(flags); + preempt_disable(); + time1 = get_cycles(); + for (i = 0; i < NR_LOOPS; i++) { + local_irq_restore(flags); + local_irq_save(flags); + } + time2 = get_cycles(); + local_irq_restore(flags); + preempt_enable(); + time = time2 - time1; + + printk(KERN_ALERT "test results: time for disabling/enabling interrupts (STI/CLI)\n"); + printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS); + printk(KERN_ALERT "total time: %llu\n", time); + time = div_u64_rem(time, NR_LOOPS, &rem); + printk(KERN_ALERT "-> enabling/disabling interrupts (STI/CLI) takes %llu cycles\n", + time); + printk(KERN_ALERT "test end\n"); +} + + + +static int ltt_test_init(void) +{ + printk(KERN_ALERT "test init\n"); + + do_testbaseline(); + do_test_sync_cmpxchg(); + do_test_cmpxchg(); + do_test_sync_inc(); + do_test_inc(); + do_test_enable_int(); + do_test_disable_int(); + do_test_int(); + return -EAGAIN; /* Fail will directly unload the module */ +} + +static void ltt_test_exit(void) +{ + printk(KERN_ALERT "test exit\n"); +} + +module_init(ltt_test_init) +module_exit(ltt_test_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mathieu Desnoyers"); +MODULE_DESCRIPTION("Cmpxchg vs int Test"); --- Regards-- Subrata > > Regards-- > Subrata > |