From: Juho Snellman <jsnell@us...>  20070206 04:48:47

Update of /cvsroot/sbcl/sbcl/src/compiler In directory sc8prcvs8.sourceforge.net:/tmp/cvsserv21212/src/compiler Modified Files: sset.lisp Log Message: 1.0.2.12: New hashbased implementation of ssets * The old version that used sorted lists had bad worst case performance, which was especially noticeable with constraint propagation on hairy functions. * Use yet another custom hash implementation (with open addressing and double hashing), since the standard hashtables are too heavy for this (e.g. locking overhead, memory consumption). * An sset implementation based on balanced trees was also tested, but in practice turned out to be even slower than the sorted lists, due to the high * DOSSETELEMENTS no longer iterates in SSETELEMENTNUMBER order, but we don't seem to rely on the old behaviour anywhere. Index: sset.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/compiler/sset.lisp,v retrieving revision 1.9 retrieving revision 1.10 diff u d r1.9 r1.10  sset.lisp 14 Jul 2005 18:57:02 0000 1.9 +++ sset.lisp 6 Feb 2007 04:48:36 0000 1.10 @@ 1,8 +1,12 @@ ;;;; This file implements a sparse set abstraction, represented as a ;;;; sorted linked list. We don't use bitvectors to represent sets in ;;;; flow analysis, since the universe may be quite large but the ;;;; average number of elements is small. We keep the list sorted so ;;;; that we can do union and intersection in linear time. +;;;; custom lightweight hashtable. We don't use bitvectors to +;;;; represent sets in flow analysis, since the universe may be quite +;;;; large but the average number of elements is small. We also don't +;;;; use sorted lists like in the original CMUCL code, since it had +;;;; bad worstcase performance (on some reallife programs the +;;;; hashbased sset gives a 20% compilation speedup). A custom +;;;; hashtable is used since the standard one is too heavy (locking, +;;;; memory use) for this use. ;;;; This software is part of the SBCL system. See the README file for ;;;; more information. @@ 11,7 +15,7 @@ ;;;; written at Carnegie Mellon University and released into the ;;;; public domain. The software is in the public domain and is ;;;; provided with absolutely no warranty. See the COPYING and CREDITS ;;;; files for more information. +;;;; files for more information. (This file no) (inpackage "SB!C") @@ 24,68 +28,168 @@ (number nil :type (or index null))) (defstruct (sset (:copier nil))  ;; The element at the head of the list here seems always to be  ;; ignored. I think this idea is that the extra level of indirection  ;; it provides is handy to allow various destructive operations on  ;; SSETs to be expressed more easily.  WHN  (elements (list nil) :type cons)) (defprinter (sset)  (elements :prin1 (cdr elements))) + ;; Vector containing the set values. 0 is used for empty (since + ;; initializing a vector with 0 is cheaper than with NIL), +DELETED+ + ;; is used to mark buckets that used to contain an element, but no + ;; longer do. + (vector #() :type simplevector) + ;; How many buckets currently contain or used to contain an element. + (free 0 :type index) + ;; How many elements are currently members of the set. + (count 0 :type index)) +(defprinter (sset) vector) ;;; Iterate over the elements in SSET, binding VAR to each element in ;;; turn. (defmacro dossetelements ((var sset &optional result) &body body)  `(dolist (,var (cdr (ssetelements ,sset)) ,result) ,@body)) + `(loop for ,var across (ssetvector ,sset) + do (unless (member ,var '(0 +deleted+)) + ,@body) + finally (return ,result))) + +;;; Primary hash. +(declaim (inline ssethash1)) +(defun ssethash1 (element) + #+sbxchost + (let ((result (ssetelementnumber element))) + ;; This is performance critical, and it's not certain that the host + ;; compiler does modular arithmetic optimization. Instad use + ;; something that most CL implementations will do efficiently. + (the fixnum (logxor (the fixnum result) + (the fixnum (ash result 9)) + (the fixnum (ash result 5))))) + #sbxchost + (let ((result (ssetelementnumber element))) + (declare (type sb!vm:word result)) + ;; We only use the loworder bits. + (macrolet ((setresult (form) + `(setf result (ldb (byte #.sb!vm:nwordbits 0) ,form)))) + (setresult (+ result (ash result 19))) + (setresult (logxor result (ash result 13))) + (setresult (+ result (ash result 9))) + (setresult (logxor result (ash result 5))) + (setresult (+ result (ash result 2))) + (logand sb!xc:mostpositivefixnum result)))) + +;;; Secondary hash (for double hash probing). Needs to return an odd +;;; number. +(declaim (inline ssethash2)) +(defun ssethash2 (element) + (let ((number (ssetelementnumber element))) + (declare (fixnum number)) + (logior 1 number))) + +;;; Double the size of the hash vector of SET. +(defun ssetgrow (set) + (let* ((vector (ssetvector set)) + (newvector (makearray (if (zerop (length vector)) + 2 + (* (length vector) 2))))) + (setf (ssetvector set) newvector + (ssetfree set) (length newvector) + (ssetcount set) 0) + (loop for element across vector + do (unless (member element '(0 +deleted+)) + (ssetadjoin element set))))) + +;;; Rehash the sset when the proportion of free cells in the set is +;;; lower than this. +(defconstant +ssetrehashthreshold+ 1/4) ;;; Destructively add ELEMENT to SET. If ELEMENT was not in the set, ;;; then we return true, otherwise we return false. (declaim (ftype (sfunction (ssetelement sset) boolean) ssetadjoin)) (defun ssetadjoin (element set)  (let ((number (ssetelementnumber element))  (elements (ssetelements set)))  (do ((prev elements current)  (current (cdr elements) (cdr current)))  ((null current)  (setf (cdr prev) (list element))  t)  (let ((el (car current)))  (when (>= (ssetelementnumber el) number)  (when (eq el element)  (return nil))  (setf (cdr prev) (cons element current))  (return t)))))) + (declare (optimize (speed 2))) + (when (<= (ssetfree set) + (max 1 (truncate (length (ssetvector set)) + #.(round (/ +ssetrehashthreshold+))))) + (ssetgrow set)) + (loop with vector = (ssetvector set) + with mask oftype fixnum = (1 (length vector)) + with secondaryhash = (ssethash2 element) + for hash oftype index = (logand mask (ssethash1 element)) then + (logand mask (+ hash secondaryhash)) + for current = (aref vector hash) + for deletedindex = nil + do (cond ((eql current 0) + (incf (ssetcount set)) + (cond (deletedindex + (setf (aref vector deletedindex) element)) + (t + (decf (ssetfree set)) + (setf (aref vector hash) element))) + (return t)) + ((and (eql current '+deleted+) + (not deletedindex)) + (setf deletedindex hash)) + ((eq current element) + (return nil))))) ;;; Destructively remove ELEMENT from SET. If element was in the set, ;;; then return true, otherwise return false. (declaim (ftype (sfunction (ssetelement sset) boolean) ssetdelete)) (defun ssetdelete (element set)  (let ((elements (ssetelements set)))  (do ((prev elements current)  (current (cdr elements) (cdr current)))  ((null current) nil)  (when (eq (car current) element)  (setf (cdr prev) (cdr current))  (return t))))) + (when (zerop (length (ssetvector set))) + (returnfrom ssetdelete nil)) + (loop with vector = (ssetvector set) + with mask fixnum = (1 (length vector)) + with secondaryhash = (ssethash2 element) + for hash oftype index = (logand mask (ssethash1 element)) then + (logand mask (+ hash secondaryhash)) + for current = (aref vector hash) + do (cond ((eql current 0) + (return nil)) + ((eq current element) + (decf (ssetcount set)) + (setf (aref vector hash) '+deleted+) + (return t))))) ;;; Return true if ELEMENT is in SET, false otherwise. (declaim (ftype (sfunction (ssetelement sset) boolean) ssetmember)) (defun ssetmember (element set)  (declare (inline member))  (not (null (member element (cdr (ssetelements set)) :test #'eq)))) + (when (zerop (length (ssetvector set))) + (returnfrom ssetmember nil)) + (loop with vector = (ssetvector set) + with mask fixnum = (1 (length vector)) + with secondaryhash = (ssethash2 element) + for hash oftype index = (logand mask (ssethash1 element)) then + (logand mask (+ hash secondaryhash)) + for current = (aref vector hash) + do (cond ((eql current 0) + (return nil)) + ((eq current element) + (return t))))) (declaim (ftype (sfunction (sset sset) boolean) sset=)) (defun sset= (set1 set2)  (equal (ssetelements set1) (ssetelements set2))) + (unless (eql (ssetcount set1) + (ssetcount set2)) + (returnfrom sset= nil)) + (dossetelements (element set1) + (unless (ssetmember element set2) + (returnfrom sset= nil))) + t) ;;; Return true if SET contains no elements, false otherwise. (declaim (ftype (sfunction (sset) boolean) ssetempty)) (defun ssetempty (set)  (null (cdr (ssetelements set)))) + (zerop (ssetcount set))) ;;; Return a new copy of SET. (declaim (ftype (sfunction (sset) sset) copysset)) (defun copysset (set)  (makesset :elements (copylist (ssetelements set)))) + (makesset :vector (let* ((vector (ssetvector set)) + (newvector (makearray (length vector)))) + (declare (type simplevector vector newvector) + (optimize speed (safety 0))) + ;; There's no REPLACE deftransform for simplevectors. + (dotimes (i (length vector)) + (setf (aref newvector i) + (aref vector i))) + newvector) + :count (ssetcount set) + :free (ssetfree set))) ;;; Perform the appropriate set operation on SET1 and SET2 by ;;; destructively modifying SET1. We return true if SET1 was modified, @@ 93,127 +197,42 @@ (declaim (ftype (sfunction (sset sset) boolean) ssetunion ssetintersection ssetdifference)) (defun ssetunion (set1 set2)  (let* ((prevel1 (ssetelements set1))  (el1 (cdr prevel1))  (changed nil))  (do ((el2 (cdr (ssetelements set2)) (cdr el2)))  ((null el2) changed)  (let* ((e (car el2))  (num2 (ssetelementnumber e)))  (loop  (when (null el1)  (setf (cdr prevel1) (copylist el2))  (returnfrom ssetunion t))  (let ((num1 (ssetelementnumber (car el1))))  (when (>= num1 num2)  (if (> num1 num2)  (let ((new (cons e el1)))  (setf (cdr prevel1) new)  (setq prevel1 new  changed t))  (shiftf prevel1 el1 (cdr el1)))  (return))  (shiftf prevel1 el1 (cdr el1)))))))) + (loop with modified = nil + for element across (ssetvector set2) + do (unless (member element '(0 +deleted+)) + (when (ssetadjoin element set1) + (setf modified t))) + finally (return modified))) (defun ssetintersection (set1 set2)  (let* ((prevel1 (ssetelements set1))  (el1 (cdr prevel1))  (changed nil))  (do ((el2 (cdr (ssetelements set2)) (cdr el2)))  ((null el2)  (cond (el1  (setf (cdr prevel1) nil)  t)  (t changed)))  (let ((num2 (ssetelementnumber (car el2))))  (loop  (when (null el1)  (returnfrom ssetintersection changed))  (let ((num1 (ssetelementnumber (car el1))))  (when (>= num1 num2)  (when (= num1 num2)  (shiftf prevel1 el1 (cdr el1)))  (return))  (pop el1)  (setf (cdr prevel1) el1)  (setq changed t))))))) + (loop with modified = nil + for element across (ssetvector set1) + for index oftype index from 0 + do (unless (member element '(0 +deleted+)) + (unless (ssetmember element set2) + (decf (ssetcount set1)) + (setf (aref (ssetvector set1) index) '+deleted+ + modified t))) + finally (return modified))) (defun ssetdifference (set1 set2)  (let* ((prevel1 (ssetelements set1))  (el1 (cdr prevel1))  (changed nil))  (do ((el2 (cdr (ssetelements set2)) (cdr el2)))  ((null el2) changed)  (let ((num2 (ssetelementnumber (car el2))))  (loop  (when (null el1)  (returnfrom ssetdifference changed))  (let ((num1 (ssetelementnumber (car el1))))  (when (>= num1 num2)  (when (= num1 num2)  (pop el1)  (setf (cdr prevel1) el1)  (setq changed t))  (return))  (shiftf prevel1 el1 (cdr el1)))))))) + (loop with modified = nil + for element across (ssetvector set1) + for index oftype index from 0 + do (unless (member element '(0 +deleted+)) + (when (ssetmember element set2) + (decf (ssetcount set1)) + (setf (aref (ssetvector set1) index) '+deleted+ + modified t))) + finally (return modified))) ;;; Destructively modify SET1 to include its union with the difference ;;; of SET2 and SET3. We return true if SET1 was modified, false ;;; otherwise. (declaim (ftype (sfunction (sset sset sset) boolean) ssetunionofdifference)) (defun ssetunionofdifference (set1 set2 set3)  (let* ((prevel1 (ssetelements set1))  (el1 (cdr prevel1))  (el3 (cdr (ssetelements set3)))  (changed nil))  (do ((el2 (cdr (ssetelements set2)) (cdr el2)))  ((null el2) changed)  (let* ((e (car el2))  (num2 (ssetelementnumber e)))  (loop  (when (null el3)  (loop  (when (null el1)  (setf (cdr prevel1) (copylist el2))  (returnfrom ssetunionofdifference t))  (let ((num1 (ssetelementnumber (car el1))))  (when (>= num1 num2)  (if (> num1 num2)  (let ((new (cons e el1)))  (setf (cdr prevel1) new)  (setq prevel1 new changed t))  (shiftf prevel1 el1 (cdr el1)))  (return))  (shiftf prevel1 el1 (cdr el1))))  (return))  (let ((num3 (ssetelementnumber (car el3))))  (when (<= num2 num3)  (unless (= num2 num3)  (loop  (when (null el1)  (do ((el2 el2 (cdr el2)))  ((null el2)  (returnfrom ssetunionofdifference changed))  (let* ((e (car el2))  (num2 (ssetelementnumber e)))  (loop  (when (null el3)  (setf (cdr prevel1) (copylist el2))  (returnfrom ssetunionofdifference t))  (setq num3 (ssetelementnumber (car el3)))  (when (<= num2 num3)  (unless (= num2 num3)  (let ((new (cons e el1)))  (setf (cdr prevel1) new)  (setq prevel1 new changed t)))  (return))  (pop el3)))))  (let ((num1 (ssetelementnumber (car el1))))  (when (>= num1 num2)  (if (> num1 num2)  (let ((new (cons e el1)))  (setf (cdr prevel1) new)  (setq prevel1 new changed t))  (shiftf prevel1 el1 (cdr el1)))  (return))  (shiftf prevel1 el1 (cdr el1)))))  (return)))  (pop el3)))))) + (loop with modified = nil + for element across (ssetvector set2) + do (unless (member element '(0 +deleted+)) + (unless (ssetmember element set3) + (when (ssetadjoin element set1) + (setf modified t)))) + finally (return modified))) 