From: Perry E. M. <pe...@pi...> - 2004-04-05 00:34:04
|
These patches do three related things. 1) Convert bzeros to memsets. Why? Because memset is the standard call, and because modern gcc inlines it, and does really well making it efficient in most cases. 2) Purge the code that does the unmap/map "trick" for zeroing large blocks. I've checked several OSes and talked to a bunch of OS gurus and everyone agrees it is at best slightly worse than inlined memset and at worst a serious bad idea. 3) Get rid of the assembly language bzero -- gcc will do better on its own. Index: src/runtime/gencgc.c =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/runtime/gencgc.c,v retrieving revision 1.51 diff -u -r1.51 gencgc.c --- src/runtime/gencgc.c 30 Mar 2004 12:24:53 -0000 1.51 +++ src/runtime/gencgc.c 4 Apr 2004 23:23:40 -0000 @@ -68,20 +68,6 @@ * that don't have pointers to younger generations? */ boolean enable_page_protection = 1; -/* Should we unmap a page and re-mmap it to have it zero filled? */ -#if defined(__FreeBSD__) || defined(__OpenBSD__) -/* comment from cmucl-2.4.8: This can waste a lot of swap on FreeBSD - * so don't unmap there. - * - * The CMU CL comment didn't specify a version, but was probably an - * old version of FreeBSD (pre-4.0), so this might no longer be true. - * OTOH, if it is true, this behavior might exist on OpenBSD too, so - * for now we don't unmap there either. -- WHN 2001-04-07 */ -boolean gencgc_unmap_zero = 0; -#else -boolean gencgc_unmap_zero = 1; -#endif - /* the minimum size (in bytes) for a large object*/ unsigned large_object_size = 4 * PAGE_BYTES; @@ -153,7 +143,7 @@ /* Calculate the start address for the given page number. */ -inline void * +static inline void * page_address(int page_num) { return (heap_base + (page_num * PAGE_BYTES)); @@ -2968,7 +2979,6 @@ * assumes that all objects have been copied or promoted to an older * generation. Bytes_allocated and the generation bytes_allocated * counter are updated. The number of bytes freed is returned. */ -extern void i586_bzero(void *addr, int nbytes); static int free_oldspace(void) { @@ -3016,37 +3026,8 @@ && (page_table[last_page].bytes_used != 0) && (page_table[last_page].gen == from_space)); - /* Zero pages from first_page to (last_page-1). - * - * FIXME: Why not use os_zero(..) function instead of - * hand-coding this again? (Check other gencgc_unmap_zero - * stuff too. */ - if (gencgc_unmap_zero) { - void *page_start, *addr; - - page_start = (void *)page_address(first_page); - - os_invalidate(page_start, PAGE_BYTES*(last_page-first_page)); - addr = os_validate(page_start, PAGE_BYTES*(last_page-first_page)); - if (addr == NULL || addr != page_start) { - /* Is this an error condition? I couldn't really tell from - * the old CMU CL code, which fprintf'ed a message with - * an exclamation point at the end. But I've never seen the - * message, so it must at least be unusual.. - * - * (The same condition is also tested for in gc_free_heap.) - * - * -- WHN 19991129 */ - lose("i586_bzero: page moved, 0x%08x ==> 0x%08x", - page_start, - addr); - } - } else { - int *page_start; - - page_start = (int *)page_address(first_page); - i586_bzero(page_start, PAGE_BYTES*(last_page-first_page)); - } + /* Zero pages from first_page to (last_page-1). */ + memset(page_address(first_page), 0, PAGE_BYTES*(last_page-first_page)); first_page = last_page; Index: src/runtime/os-common.c =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/runtime/os-common.c,v retrieving revision 1.6 diff -u -r1.6 os-common.c --- src/runtime/os-common.c 20 Feb 2004 18:15:20 -0000 1.6 +++ src/runtime/os-common.c 4 Apr 2004 23:23:40 -0000 @@ -11,7 +11,7 @@ #include <stdio.h> #include <errno.h> -#include <strings.h> +#include <string.h> #include "sbcl.h" #include "os.h" @@ -22,38 +22,11 @@ * instead. See hpux-os.c for some useful restrictions on actual * usage. */ +/* FIXME: this should be turned into a pure inline memset where it is used. */ void os_zero(os_vm_address_t addr, os_vm_size_t length) { - os_vm_address_t block_start; - os_vm_size_t block_size; - -#ifdef DEBUG - fprintf(stderr,";;; os_zero: addr: 0x%08x, len: 0x%08x\n",addr,length); -#endif - - block_start = os_round_up_to_page(addr); - - length -= block_start-addr; - block_size = os_trunc_size_to_page(length); - - if (block_start > addr) - bzero((char *)addr, block_start-addr); - if (block_size < length) - bzero((char *)block_start+block_size, length-block_size); - - if (block_size != 0) { - /* Now deallocate and allocate the block so that it faults in - * zero-filled. */ - - os_invalidate(block_start, block_size); - addr = os_validate(block_start, block_size); - - if (addr == NULL || addr != block_start) - lose("os_zero: block moved! 0x%08x ==> 0x%08x", - block_start, - addr); - } + memset(addr, 0, length); } os_vm_address_t Index: src/runtime/vars.c =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/runtime/vars.c,v retrieving revision 1.4 diff -u -r1.4 vars.c --- src/runtime/vars.c 20 Feb 2004 18:15:20 -0000 1.4 +++ src/runtime/vars.c 4 Apr 2004 23:23:43 -0000 @@ -10,7 +10,7 @@ */ #include <stdio.h> -#include <strings.h> +#include <string.h> #include <sys/types.h> #include <stdlib.h> @@ -74,8 +74,8 @@ free(var); } } - bzero(NameHash, sizeof(NameHash)); - bzero(ObjHash, sizeof(ObjHash)); + memset(NameHash, 0, sizeof(NameHash)); + memset(ObjHash, 0, sizeof(ObjHash)); tempcntr = 1; for (var = perm; var != NULL; var = next) { Index: src/runtime/x86-assem.S =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/runtime/x86-assem.S,v retrieving revision 1.13 diff -u -r1.13 x86-assem.S --- src/runtime/x86-assem.S 16 Aug 2003 20:38:40 -0000 1.13 +++ src/runtime/x86-assem.S 4 Apr 2004 23:23:43 -0000 @@ -339,30 +339,6 @@ .byte trap_PendingInterrupt ret .size GNAME(do_pending_interrupt),.-GNAME(do_pending_interrupt) - -#ifdef LISP_FEATURE_GENCGC -/* This is a fast bzero using the FPU. The first argument is the start - * address which needs to be aligned on an 8 byte boundary, the second - * argument is the number of bytes, which must be a nonzero multiple - * of 8 bytes. */ -/* FIXME whether this is still faster than using the OS's bzero or - * equivalent, we don't know */ - .text - .globl GNAME(i586_bzero) - .type GNAME(i586_bzero),@function - .align align_4byte,0x90 -GNAME(i586_bzero): - movl 4(%esp),%edx # Load the start address. - movl 8(%esp),%eax # Load the number of bytes. - fldz -l1: fstl 0(%edx) - addl $8,%edx - subl $8,%eax - jnz l1 - fstp %st(0) - ret - .size GNAME(i586_bzero),.-GNAME(i586_bzero) -#endif /* |