From: Douglas K. <sn...@us...> - 2014-11-14 01:36:51
|
The branch "master" has been updated in SBCL: via 06ccf13ed3eb4a2b0574ec610eeb51cb40f375c3 (commit) from a1dfa892824adb3331f007bf0cb8c9f05b0afc5e (commit) - Log ----------------------------------------------------------------- commit 06ccf13ed3eb4a2b0574ec610eeb51cb40f375c3 Author: Douglas Katzman <do...@go...> Date: Thu Nov 13 20:24:45 2014 -0500 Ensure that all interned symbols have a precomputed hash. --- package-data-list.lisp-expr | 1 + src/code/package.lisp | 11 ++- src/code/sxhash.lisp | 9 +-- src/code/symbol.lisp | 38 +++++++++- src/code/target-package.lisp | 146 +++++++++++++++++++----------------- src/code/target-sxhash.lisp | 5 - src/compiler/generic/vm-fndb.lisp | 3 +- src/compiler/x86-64/cell.lisp | 2 +- src/pcl/slots-boot.lisp | 6 +- tests/info.impure.lisp | 2 +- tests/packages.impure.lisp | 2 +- 11 files changed, 132 insertions(+), 93 deletions(-) diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 0e8569e..5172d7e 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1567,6 +1567,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "FLOAT-WAIT" "DYNAMIC-SPACE-FREE-POINTER" "DYNAMIC-USAGE" "EFFECTIVE-FIND-POSITION-TEST" "EFFECTIVE-FIND-POSITION-KEY" + "ENSURE-SYMBOL-HASH" "ENSURE-SYMBOL-TLS-INDEX" "ERROR-NUMBER-OR-LOSE" "EXTENDED-CHAR-P" "EXTERNAL-FORMAT-DESIGNATOR" diff --git a/src/code/package.lisp b/src/code/package.lisp index ffe470a..e964ba2 100644 --- a/src/code/package.lisp +++ b/src/code/package.lisp @@ -35,10 +35,10 @@ (def!struct (package-hashtable (:constructor %make-package-hashtable - (table size &aux (free size))) + (cells size &aux (free size))) (:copier nil)) ;; The general-vector of symbols, with a hash-vector in its last cell. - (table (missing-arg) :type simple-vector) + (cells (missing-arg) :type simple-vector) ;; The total number of entries allowed before resizing. ;; ;; FIXME: CAPACITY would be a more descriptive name. (This is @@ -167,6 +167,9 @@ (values (logior (ash access-types 3) #b11) 0 #() (package-listify pkg-designator-list))) +(declaim (inline pkg-symbol-valid-p)) +(defun pkg-symbol-valid-p (x) (not (fixnump x))) + ;; The STATE parameter is comprised of 4 packed fields ;; [0:1] = substate {0=internal,1=external,2=inherited,3=initial} ;; [2] = package with inherited symbols has shadowing symbols @@ -220,7 +223,7 @@ (this-package () (truly-the sb!xc:package (car pkglist))) (start (next-state new-table) - (let ((symbols (package-hashtable-table new-table))) + (let ((symbols (package-hashtable-cells new-table))) (package-iter-step (logior (mask-field (byte 3 3) start-state) next-state) ;; assert that physical length was nonzero @@ -232,7 +235,7 @@ (macrolet ((scan (&optional (guard t)) `(loop (let ((sym (aref sym-vec (decf index)))) - (when (and (not (eql sym 0)) ,guard) + (when (and (pkg-symbol-valid-p sym) ,guard) (return (values start-state index sym-vec pkglist sym (aref #(:internal :external :inherited) (logand start-state 3)))))) diff --git a/src/code/sxhash.lisp b/src/code/sxhash.lisp index 6b09f00..d82bb79 100644 --- a/src/code/sxhash.lisp +++ b/src/code/sxhash.lisp @@ -118,14 +118,7 @@ ;; SYMBOL-HASH (which contains NIL itself) is a negative ;; fixnum. (if (= 0 result) - (let ((sxhash (%sxhash-simple-string (symbol-name x)))) - ;; We could do a (logior sxhash #x10000000) to - ;; ensure that we never store a 0 in the - ;; slot. However, it's such an unlikely event - ;; (1/5e8?) that it makes more sense to optimize for - ;; the common case... - (%set-symbol-hash x sxhash) - sxhash) + (ensure-symbol-hash x) result))))) (deftransform psxhash ((x &optional depthoid) (character &optional t)) diff --git a/src/code/symbol.lisp b/src/code/symbol.lisp index cafea98..43bf930 100644 --- a/src/code/symbol.lisp +++ b/src/code/symbol.lisp @@ -73,7 +73,43 @@ distinct from the global value. Can also be SETF." (%makunbound symbol) symbol)) -;;; Return the built-in hash value for SYMBOL. +;; Compute a symbol's hash. Also used by FIND-SYMBOL which requires that a hash +;; be a pure function of the name and not a semi-opaque property of the symbol. +;; The hash of all symbols named "NIL" must be the same, so not to pessimize +;; FIND-SYMBOL by special-casing the finding of CL:NIL with an extra "or" +;; in the hash-equality test. i.e. We can't recognize that CL:NIL was the +;; object sought (having an exceptional hash) until it has been found. +(defun compute-symbol-hash (string length) + (declare (simple-string string) (index length)) + (if (and (= length 3) + (locally + ;; SXHASH-SUBSTRING is unsafe, so this is too. but do we know that + ;; length is ok, or is it an accident that it can scan too far? + (declare (optimize (safety 0))) + (string-dispatch (simple-base-string (simple-array character (*))) + string + (and (char= (schar string 0) #\N) + (char= (schar string 1) #\I) + (char= (schar string 2) #\L))))) + ;; FIXME: hardwire this. See similar comment at + ;; (deftransform sxhash ((x) (symbol)) + (return-from compute-symbol-hash (symbol-hash nil))) + ;; And make a symbol's hash not the same as (sxhash name) in general. + (let ((sxhash (logand (lognot (%sxhash-simple-substring string length)) + sb!xc:most-positive-fixnum))) + (if (zerop sxhash) #x55AA sxhash))) ; arbitrary substitute for 0 + +;; Return SYMBOL's hash, a strictly positive fixnum, computing it if not stored. +;; The inlined code for (SXHASH symbol) only calls ENSURE-SYMBOL-HASH if +;; needed, however this is ok to call even if the hash is already nonzero. +(defun ensure-symbol-hash (symbol) + (let ((hash (symbol-hash symbol))) + (if (zerop hash) + (let ((name (symbol-name symbol))) + (%set-symbol-hash symbol (compute-symbol-hash name (length name)))) + hash))) + +;;; Interpreter stub: Return whatever is in the SYMBOL-HASH slot of SYMBOL. (defun symbol-hash (symbol) (symbol-hash symbol)) diff --git a/src/code/target-package.lisp b/src/code/target-package.lisp index 697c64f..b460ef2 100644 --- a/src/code/target-package.lisp +++ b/src/code/target-package.lisp @@ -70,12 +70,17 @@ (def!method print-object ((table package-hashtable) stream) (declare (type stream stream)) - (print-unreadable-object (table stream :type t) - (format stream - ":SIZE ~S :FREE ~S :DELETED ~S" - (package-hashtable-size table) - (package-hashtable-free table) - (package-hashtable-deleted table)))) + (print-unreadable-object (table stream :type t :identity t) + (let* ((n-live (%package-hashtable-symbol-count table)) + (n-deleted (package-hashtable-deleted table)) + (n-filled (+ n-live n-deleted)) + (n-cells (1- (length (package-hashtable-cells table))))) + (format stream + "(~D+~D)/~D [~@[~,3f words/sym,~]load=~,1f%]" + n-live n-deleted n-cells + (unless (zerop n-live) + (/ (* (1+ (/ sb!vm:n-word-bytes)) n-cells) n-live)) + (* 100 (/ n-filled n-cells)))))) ;;; the maximum load factor we allow in a package hashtable (!defparameter *package-rehash-threshold* 3/4) @@ -109,14 +114,14 @@ ;;; Destructively resize TABLE to have room for at least SIZE entries ;;; and rehash its existing entries. (defun resize-package-hashtable (table size) - (let* ((symvec (package-hashtable-table table)) + (let* ((symvec (package-hashtable-cells table)) (len (1- (length symvec))) (temp-table (make-package-hashtable size))) (dotimes (i len) (let ((sym (svref symvec i))) - (unless (eql sym 0) + (when (pkg-symbol-valid-p sym) (add-symbol temp-table sym)))) - (setf (package-hashtable-table table) (package-hashtable-table temp-table) + (setf (package-hashtable-cells table) (package-hashtable-cells temp-table) (package-hashtable-size table) (package-hashtable-size temp-table) (package-hashtable-free table) (package-hashtable-free temp-table) (package-hashtable-deleted table) 0))) @@ -584,19 +589,15 @@ REMOVE-PACKAGE-LOCAL-NICKNAME, and the DEFPACKAGE option :LOCAL-NICKNAMES." ;;;; operations on package hashtables -;;; Compute a number from the sxhash of the pname and the length which -;;; must be between 2 and 255. -(defmacro entry-hash (length sxhash) - `(the fixnum - (+ (the fixnum - (rem (the fixnum - (logxor ,length - ,sxhash - (the fixnum (ash ,sxhash -8)) - (the fixnum (ash ,sxhash -16)) - (the fixnum (ash ,sxhash -19)))) - 254)) - 2))) +;;; Compute a number between 1 and 255 based on the sxhash of the +;;; pname and the length thereof. +(declaim (inline entry-hash)) +(defun entry-hash (length sxhash) + (declare (index length) ((and fixnum unsigned-byte) sxhash)) + (1+ (rem (logxor length sxhash + (ash sxhash -8) (ash sxhash -16) (ash sxhash -19)) + 255))) + ;;; FIXME: should be wrapped in EVAL-WHEN (COMPILE EXECUTE) ;;; Add a symbol to a package hashtable. The symbol is assumed @@ -611,24 +612,23 @@ REMOVE-PACKAGE-LOCAL-NICKNAME, and the DEFPACKAGE option :LOCAL-NICKNAMES." (* (- (package-hashtable-size table) (package-hashtable-deleted table)) 2))) - (let* ((symvec (package-hashtable-table table)) + (let* ((symvec (package-hashtable-cells table)) (len (1- (length symvec))) (hashvec (the hash-vector (aref symvec len))) - (sxhash (%sxhash-simple-string (symbol-name symbol))) + (sxhash (truly-the fixnum (ensure-symbol-hash symbol))) (h2 (1+ (rem sxhash (- len 2))))) (declare (fixnum sxhash h2)) (do ((i (rem sxhash len) (rem (+ i h2) len))) - ((< (aref hashvec i) 2) - (if (eql (aref hashvec i) 0) + ((eql (aref hashvec i) 0) + (if (eql (svref symvec i) 0) (decf (package-hashtable-free table)) (decf (package-hashtable-deleted table))) ;; This order of these two SETFs does not matter. - ;; A symbol cell of 0 is skipped on lookup if the hash cell - ;; matches something accidentally. + ;; An empty symbol cell is skipped on lookup if the hash cell + ;; matches something accidentally. "empty" = any fixnum. (setf (svref symvec i) symbol) (setf (aref hashvec i) - (entry-hash (length (symbol-name symbol)) - sxhash))) + (entry-hash (length (symbol-name symbol)) sxhash))) (declare (fixnum i))))) ;;; Resize the package hashtables of all packages so that their load @@ -655,9 +655,8 @@ REMOVE-PACKAGE-LOCAL-NICKNAME, and the DEFPACKAGE option :LOCAL-NICKNAMES." (defmacro with-symbol (((symbol-var &optional (index-var (gensym))) table string length sxhash entry-hash) &body forms) - (let ((vec (gensym)) (hash-vec (gensym)) (len (gensym)) (h2 (gensym)) - (name (gensym)) (ehash (gensym))) - `(let* ((,vec (package-hashtable-table ,table)) + (with-unique-names (vec len hash-vec h2 probed-ehash name) + `(let* ((,vec (package-hashtable-cells ,table)) (,len (1- (length ,vec))) (,hash-vec (the hash-vector (svref ,vec ,len))) (,index-var (rem (the hash ,sxhash) ,len)) @@ -665,23 +664,28 @@ REMOVE-PACKAGE-LOCAL-NICKNAME, and the DEFPACKAGE option :LOCAL-NICKNAMES." (the index (- ,len 2))))))) (declare (type index ,len ,h2 ,index-var)) (loop - (let ((,ehash (aref ,hash-vec ,index-var))) - (cond ((eql ,ehash ,entry-hash) - (let ((,symbol-var (svref ,vec ,index-var))) - (unless (eql ,symbol-var 0) - (let ((,name (symbol-name (truly-the symbol ,symbol-var)))) - (when (and (= (length ,name) ,length) - (string= ,string ,name - :end1 ,length :end2 ,length)) - (return (progn ,@forms))))))) - ((zerop ,ehash) - (return)))) - (setq ,index-var (+ ,index-var ,h2)) - (when (>= ,index-var ,len) - (setq ,index-var (- ,index-var ,len))))))) + (let ((,probed-ehash (aref ,hash-vec ,index-var))) + (cond + ((eql ,probed-ehash ,entry-hash) + (let ((,symbol-var (truly-the symbol (svref ,vec ,index-var)))) + (when (eq (symbol-hash ,symbol-var) ,sxhash) + (let ((,name (symbol-name ,symbol-var))) + ;; The pre-test for length is kind of an unimportant + ;; optimization, but passing it for both :end arguments + ;; requires that it be within bounds for the probed symbol. + (when (and (= (length ,name) ,length) + (string= ,string ,name + :end1 ,length :end2 ,length)) + (return (progn ,@forms))))))) + ((eql ,probed-ehash 0) + ;; either a never used cell or a tombstone left by UNINTERN + (when (eql (svref ,vec ,index-var) 0) ; really never used + (return))))) + (when (>= (incf ,index-var ,h2) ,len) + (decf ,index-var ,len)))))) ;;; Delete the entry for STRING in TABLE. The entry must exist. -;;; Deletion stores 0 for the symbol and 1 for the hash tombstone. +;;; Deletion stores -1 for the symbol and 0 for the hash tombstone. ;;; Storing NIL for the symbol, as used to be done, is vulnerable to a rare ;;; concurrency bug because many strings have the same ENTRY-HASH as NIL: ;;; (entry-hash 3 (sxhash "NIL")) => 177 and @@ -704,20 +708,20 @@ REMOVE-PACKAGE-LOCAL-NICKNAME, and the DEFPACKAGE option :LOCAL-NICKNAMES." ;;; check is required, since a symbol is both its key and value, ;;; and the "absence of a symbol" marker is never mistaken for a symbol. ;;; -(defun nuke-symbol (table string) - (declare (simple-string string)) - (let* ((length (length string)) - (hash (%sxhash-simple-string string)) +(defun nuke-symbol (table symbol) + (let* ((string (symbol-name symbol)) + (length (length string)) + (hash (symbol-hash symbol)) (ehash (entry-hash length hash))) (declare (type index length) (type hash hash)) (with-symbol ((symbol index) table string length hash ehash) ;; It is suboptimal to grab the vectors again, but not broken, ;; because we have exclusive use of the table for writing. - (let* ((symvec (package-hashtable-table table)) + (let* ((symvec (package-hashtable-cells table)) (hashvec (the hash-vector (aref symvec (1- (length symvec)))))) - (setf (aref hashvec index) 1) - (setf (aref symvec index) 0)) + (setf (aref hashvec index) 0) + (setf (aref symvec index) -1)) ; any nonzero fixnum will do (incf (package-hashtable-deleted table)))) ;; If the table is less than one quarter full, halve its size and ;; rehash the entries. @@ -926,7 +930,7 @@ implementation it is ~S." *default-package-use-list*) (with-package-names (names) (maphash (lambda (k v) (declare (ignore k)) - (pushnew v res)) + (pushnew v res :test #'eq)) names)) res)) @@ -976,7 +980,7 @@ implementation it is ~S." *default-package-use-list*) ;;; If the symbol named by the first LENGTH characters of NAME doesn't exist, ;;; then create it, special-casing the keyword package. (defun intern* (name length package &key no-copy) - (declare (simple-string name)) + (declare (simple-string name) (index length)) (multiple-value-bind (symbol where) (find-symbol* name length package) (cond (where (values symbol where)) @@ -1006,13 +1010,13 @@ implementation it is ~S." *default-package-use-list*) (with-single-package-locked-error (:package package "interning ~A" symbol-name) (let ((symbol (make-symbol symbol-name))) + (add-symbol (cond ((eq package *keyword-package*) + (%set-symbol-value symbol symbol) + (package-external-symbols package)) + (t + (package-internal-symbols package))) + symbol) (%set-symbol-package symbol package) - (cond - ((eq package *keyword-package*) - (%set-symbol-value symbol symbol) - (add-symbol (package-external-symbols package) symbol)) - (t - (add-symbol (package-internal-symbols package) symbol))) (values symbol nil)))))))))) ;;; Check internal and external symbols, then scan down the list @@ -1020,7 +1024,7 @@ implementation it is ~S." *default-package-use-list*) (defun find-symbol* (string length package) (declare (simple-string string) (type index length)) - (let* ((hash (%sxhash-simple-substring string length)) + (let* ((hash (compute-symbol-hash string length)) (ehash (entry-hash length hash))) (declare (type hash hash ehash)) (with-symbol ((symbol) (package-internal-symbols package) @@ -1051,10 +1055,12 @@ implementation it is ~S." *default-package-use-list*) ;;; Return the symbol and T if found, otherwise two NILs. ;;; This is used for fast name-conflict checking in this file and symbol ;;; printing in the printer. +;;; An optimization is possible here: by accepting either a string or symbol, +;;; if the symbol's hash slot is nonzero, we can avoid COMPUTE-SYMBOL-HASH. (defun find-external-symbol (string package) (declare (simple-string string)) (let* ((length (length string)) - (hash (%sxhash-simple-string string)) + (hash (compute-symbol-hash string length)) (ehash (entry-hash length hash))) (declare (type index length) (type hash hash)) @@ -1215,7 +1221,9 @@ uninterned." (let ((cset ())) (dolist (p (package-%use-list package)) (multiple-value-bind (s w) (find-external-symbol name p) - (when w (pushnew s cset)))) + ;; S should be derived as SYMBOL so that PUSHNEW can assume #'EQ + ;; as the test, but it's not happening, so restate the obvious. + (when w (pushnew s cset :test #'eq)))) (when (cdr cset) (apply #'name-conflict package 'unintern symbol cset) (return-from unintern t))) @@ -1228,7 +1236,7 @@ uninterned." (nuke-symbol (if (eq w :internal) (package-internal-symbols package) (package-external-symbols package)) - name) + symbol) (if (eq (symbol-package symbol) package) (%set-symbol-package symbol nil)) t) @@ -1306,7 +1314,7 @@ uninterned." (external (package-external-symbols package))) (dolist (sym syms) (add-symbol external sym) - (nuke-symbol internal (symbol-name sym))))) + (nuke-symbol internal sym)))) t))) ;;; Check that all symbols are accessible, then move from external to internal. @@ -1333,7 +1341,7 @@ uninterned." (external (package-external-symbols package))) (dolist (sym syms) (add-symbol internal sym) - (nuke-symbol external (symbol-name sym))))) + (nuke-symbol external sym)))) t))) ;;; Check for name conflict caused by the import and let the user diff --git a/src/code/target-sxhash.lisp b/src/code/target-sxhash.lisp index 6ebc113..fe9ad77 100644 --- a/src/code/target-sxhash.lisp +++ b/src/code/target-sxhash.lisp @@ -173,11 +173,6 @@ (declaim (ftype (sfunction (integer) hash) sxhash-bignum)) (declaim (ftype (sfunction (t) hash) sxhash-instance)) -;; FIXME: a feature of this implementation is (SXHASH "FOO") = (SXHASH 'FOO) -;; but that is not a requirement. (in fact it is false of "NIL" and NIL) -;; A consequence is that tables of symbols and/or strings, or more -;; complex structures such as (FOO "X") and ("FOO" X), -;; have more collisions than they should. (defun sxhash (x) ;; profiling SXHASH is hard, but we might as well try to make it go ;; fast, in case it is the bottleneck somewhere. -- CSR, 2003-03-14 diff --git a/src/compiler/generic/vm-fndb.lisp b/src/compiler/generic/vm-fndb.lisp index 3faa13b..ab77ccf 100644 --- a/src/compiler/generic/vm-fndb.lisp +++ b/src/compiler/generic/vm-fndb.lisp @@ -93,7 +93,8 @@ (defknown %sxhash-simple-string (simple-string) hash (foldable flushable)) -(defknown %sxhash-simple-substring (simple-string index) hash +(defknown (%sxhash-simple-substring compute-symbol-hash) + (simple-string index) hash (foldable flushable)) (defknown symbol-hash (symbol) hash diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp index 2bbaf1b..cf3d819 100644 --- a/src/compiler/x86-64/cell.lisp +++ b/src/compiler/x86-64/cell.lisp @@ -309,7 +309,7 @@ (:result-types positive-fixnum) (:generator 2 ;; The symbol-hash slot of NIL holds NIL because it is also the - ;; cdr slot, so we have to strip off the three low bits to make sure + ;; cdr slot, so we have to zero the fixnum tag bit(s) to make sure ;; it is a fixnum. The lowtag selection magic that is required to ;; ensure this is explained in the comment in objdef.lisp (loadw res symbol symbol-hash-slot other-pointer-lowtag) diff --git a/src/pcl/slots-boot.lisp b/src/pcl/slots-boot.lisp index b64e64e..bc846c9 100644 --- a/src/pcl/slots-boot.lisp +++ b/src/pcl/slots-boot.lisp @@ -622,7 +622,9 @@ (declare (optimize (sb-c::insert-array-bounds-checks 0))) (let* ((vector (layout-slot-table wrapper)) (modulus (truly-the index (svref vector 0))) - (index (rem (sxhash slot-name) modulus)) + ;; Can elide the 'else' branch of (OR symbol-hash ensure-symbol-hash) + ;; because every symbol in the slot-table already got a nonzero hash. + (index (rem (symbol-hash slot-name) modulus)) (probe (svref vector (1+ index)))) (declare (simple-vector vector) (index index)) (cond ((fixnump probe) @@ -644,7 +646,7 @@ (flet ((add-to-vector (name slot) (declare (symbol name) (optimize (sb-c::insert-array-bounds-checks 0))) - (let ((index (rem (sxhash name) n))) + (let ((index (rem (ensure-symbol-hash name) n))) (setf (svref vector index) (acons name (cons (when (or bootstrap diff --git a/tests/info.impure.lisp b/tests/info.impure.lisp index e30a12a..3566328 100644 --- a/tests/info.impure.lisp +++ b/tests/info.impure.lisp @@ -365,7 +365,7 @@ (defun classoid-cell-test-get-lotsa-symbols () (remove-if-not #'symbolp - (package-hashtable-table + (package-hashtable-cells (package-internal-symbols (find-package "SB-C"))))) ;; Make every symbol in the test set have a classoid-cell diff --git a/tests/packages.impure.lisp b/tests/packages.impure.lisp index 695d7b9..84008ed 100644 --- a/tests/packages.impure.lisp +++ b/tests/packages.impure.lisp @@ -727,7 +727,7 @@ if a restart was invoked." ;; (In fact we allow INTERN, but that's beside the point) (with-test (:name :with-package-iterator-and-mutation) (flet ((table-size (pkg) - (length (sb-impl::package-hashtable-table + (length (sb-impl::package-hashtable-cells (sb-impl::package-internal-symbols pkg))))) (let* ((p (make-package (string (gensym)))) (initial-table-size (table-size p)) ----------------------------------------------------------------------- hooks/post-receive -- SBCL |