[403bac]: src / code / target-sxhash.lisp Maximize Restore History

Download this file

target-sxhash.lisp    333 lines (313 with data), 12.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
;;;; hashing functions
;;;; This software is part of the SBCL system. See the README file for
;;;; more information.
;;;;
;;;; This software is derived from the CMU CL system, which was
;;;; written at Carnegie Mellon University and released into the
;;;; public domain. The software is in the public domain and is
;;;; provided with absolutely no warranty. See the COPYING and CREDITS
;;;; files for more information.
(in-package "SB!IMPL")
;;; the depthoid explored when calculating hash values
;;;
;;; "Depthoid" here is a sort of mixture of what Common Lisp ordinarily calls
;;; depth and what Common Lisp ordinarily calls length; it's incremented either
;;; when we descend into a compound object or when we step through elements of
;;; a compound object.
(defconstant +max-hash-depthoid+ 4)
;;;; mixing hash values
;;; a function for mixing hash values
;;;
;;; desiderata:
;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the
;;; same value as #(5 1), and ending up in real trouble in some
;;; special cases like bit vectors the way that CMUCL 18b SXHASH
;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..)
;;; * We'd like to scatter our hash values over the entire possible range
;;; of values instead of hashing small or common key values (like
;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b
;;; SXHASH function does, again helping to avoid pathologies like
;;; hashing all bit vectors to 1.
;;; * We'd like this to be simple and fast, too.
;;;
;;; FIXME: Should this be INLINE?
(declaim (ftype (function ((and fixnum unsigned-byte)
(and fixnum unsigned-byte))
(and fixnum unsigned-byte)) mix))
(defun mix (x y)
;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler
;; were smarter about optimizing ASH. (Without the THE FIXNUM below,
;; and the (SAFETY 0) declaration here to get the compiler to trust
;; it, the sbcl-0.5.0m cross-compiler running under Debian
;; cmucl-2.4.17 turns the ASH into a full call, requiring the
;; UNSIGNED-BYTE 32 argument to be coerced to a bignum, requiring
;; consing, and thus generally obliterating performance.)
(declare (optimize (speed 3) (safety 0)))
(declare (type (and fixnum unsigned-byte) x y))
;; the ideas here:
;; * Bits diffuse in both directions (shifted left by up to 2 places
;; in the calculation of XY, and shifted right by up to 5 places
;; by the ASH).
;; * The #'+ and #'LOGXOR operations don't commute with each other,
;; so different bit patterns are mixed together as they shift
;; past each other.
;; * The arbitrary constant in the #'LOGXOR expression is intended
;; to help break up any weird anomalies we might otherwise get
;; when hashing highly regular patterns.
;; (These are vaguely like the ideas used in many cryptographic
;; algorithms, but we're not pushing them hard enough here for them
;; to be cryptographically strong.)
(let* ((xy (+ (* x 3) y)))
(declare (type (unsigned-byte 32) xy))
(the (and fixnum unsigned-byte)
(logand most-positive-fixnum
(logxor 441516657
xy
(the fixnum (ash xy -5)))))))
;;;; hashing strings
;;;;
;;;; Note that this operation is used in compiler symbol table lookups, so we'd
;;;; like it to be fast.
#!-sb-fluid (declaim (inline %sxhash-substring))
(defun %sxhash-substring (string &optional (count (length string)))
;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the
;; cross-compiler were smarter about ASH, but we need it for sbcl-0.5.0m.
(declare (optimize (speed 3) (safety 0)))
(declare (type string string))
(declare (type index count))
(let ((result 408967240))
(declare (type fixnum result))
(unless (typep string '(vector nil))
(dotimes (i count)
(declare (type index i))
(mixf result
(the fixnum
(ash (char-code (aref string i)) 5)))))
result))
;;; test:
;;; (let ((ht (make-hash-table :test 'equal)))
;;; (do-all-symbols (symbol)
;;; (let* ((string (symbol-name symbol))
;;; (hash (%sxhash-substring string)))
;;; (if (gethash hash ht)
;;; (unless (string= (gethash hash ht) string)
;;; (format t "collision: ~S ~S~%" string (gethash hash ht)))
;;; (setf (gethash hash ht) string))))
;;; (format t "final count=~W~%" (hash-table-count ht)))
(defun %sxhash-simple-string (x)
(declare (optimize speed))
(declare (type simple-string x))
(%sxhash-substring x))
(defun %sxhash-simple-substring (x count)
(declare (optimize speed))
(declare (type simple-string x))
(declare (type index count))
(%sxhash-substring x count))
;;;; the SXHASH function
(defun sxhash (x)
;; profiling SXHASH is hard, but we might as well try to make it go
;; fast, in case it is the bottleneck somwhere. -- CSR, 2003-03-14
(declare (optimize speed))
(labels ((sxhash-number (x)
(etypecase x
(fixnum (sxhash x)) ; through DEFTRANSFORM
(integer (sb!bignum:sxhash-bignum x))
(single-float (sxhash x)) ; through DEFTRANSFORM
(double-float (sxhash x)) ; through DEFTRANSFORM
#!+long-float (long-float (error "stub: no LONG-FLOAT"))
(ratio (let ((result 127810327))
(declare (type fixnum result))
(mixf result (sxhash-number (numerator x)))
(mixf result (sxhash-number (denominator x)))
result))
(complex (let ((result 535698211))
(declare (type fixnum result))
(mixf result (sxhash-number (realpart x)))
(mixf result (sxhash-number (imagpart x)))
result))))
(sxhash-recurse (x &optional (depthoid +max-hash-depthoid+))
(declare (type index depthoid))
(typecase x
(cons
(if (plusp depthoid)
(mix (sxhash-recurse (car x) (1- depthoid))
(sxhash-recurse (cdr x) (1- depthoid)))
261835505))
(instance
(if (or (typep x 'structure-object) (typep x 'condition))
(logxor 422371266
(sxhash ; through DEFTRANSFORM
(classoid-name
(layout-classoid (%instance-layout x)))))
(sxhash-instance x)))
(symbol (sxhash x)) ; through DEFTRANSFORM
(array
(typecase x
(simple-string (sxhash x)) ; through DEFTRANSFORM
(string (%sxhash-substring x))
(simple-bit-vector (sxhash x)) ; through DEFTRANSFORM
(bit-vector
;; FIXME: It must surely be possible to do better
;; than this. The problem is that a non-SIMPLE
;; BIT-VECTOR could be displaced to another, with a
;; non-zero offset -- so that significantly more
;; work needs to be done using the %RAW-BITS
;; approach. This will probably do for now.
(sxhash-recurse (copy-seq x) depthoid))
(t (logxor 191020317 (sxhash (array-rank x))))))
(character
(logxor 72185131
(sxhash (char-code x)))) ; through DEFTRANSFORM
;; general, inefficient case of NUMBER
(number (sxhash-number x))
(generic-function (sxhash-instance x))
(t 42))))
(sxhash-recurse x)))
;;;; the PSXHASH function
;;;; FIXME: This code does a lot of unnecessary full calls. It could be made
;;;; more efficient (in both time and space) by rewriting it along the lines
;;;; of the SXHASH code above.
;;; like SXHASH, but for EQUALP hashing instead of EQUAL hashing
(defun psxhash (key &optional (depthoid +max-hash-depthoid+))
(declare (optimize speed))
(declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
;; Note: You might think it would be cleaner to use the ordering given in the
;; table from Figure 5-13 in the EQUALP section of the ANSI specification
;; here. So did I, but that is a snare for the unwary! Nothing in the ANSI
;; spec says that HASH-TABLE can't be a STRUCTURE-OBJECT, and in fact our
;; HASH-TABLEs *are* STRUCTURE-OBJECTs, so we need to pick off the special
;; HASH-TABLE behavior before we fall through to the generic STRUCTURE-OBJECT
;; comparison behavior.
(typecase key
(array (array-psxhash key depthoid))
(hash-table (hash-table-psxhash key))
(structure-object (structure-object-psxhash key depthoid))
(cons (list-psxhash key depthoid))
(number (number-psxhash key))
(character (sxhash (char-upcase key)))
(t (sxhash key))))
(defun array-psxhash (key depthoid)
(declare (optimize speed))
(declare (type array key))
(declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
(typecase key
;; VECTORs have to be treated specially because ANSI specifies
;; that we must respect fill pointers.
(vector
(macrolet ((frob ()
'(let ((result 572539))
(declare (type fixnum result))
(mixf result (length key))
(dotimes (i (min depthoid (length key)))
(declare (type fixnum i))
(mixf result
(psxhash (aref key i)
(- depthoid 1 i))))
result)))
;; CMU can compile SIMPLE-ARRAY operations so much more efficiently
;; than the general case that it's probably worth picking off the
;; common special cases.
(typecase key
(simple-string
;;(format t "~&SIMPLE-STRING special case~%")
(frob))
(simple-vector
;;(format t "~&SIMPLE-VECTOR special case~%")
(frob))
(t (frob)))))
;; Any other array can be hashed by working with its underlying
;; one-dimensional physical representation.
(t
(let ((result 60828))
(declare (type fixnum result))
(dotimes (i (min depthoid (array-rank key)))
(mixf result (array-dimension key i)))
(dotimes (i (min depthoid (array-total-size key)))
(mixf result
(psxhash (row-major-aref key i)
(- depthoid 1 i))))
result))))
(defun structure-object-psxhash (key depthoid)
(declare (optimize speed))
(declare (type structure-object key))
(declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
(let* ((layout (%instance-layout key)) ; i.e. slot #0
(length (layout-length layout))
(classoid (layout-classoid layout))
(name (classoid-name classoid))
(result (mix (sxhash name) (the fixnum 79867))))
(declare (type fixnum result))
(dotimes (i (min depthoid (1- length)))
(declare (type fixnum i))
(let ((j (1+ i))) ; skipping slot #0, which is for LAYOUT
(declare (type fixnum j))
(mixf result
(psxhash (%instance-ref key j)
(1- depthoid)))))
result))
(defun list-psxhash (key depthoid)
(declare (optimize speed))
(declare (type list key))
(declare (type (integer 0 #.+max-hash-depthoid+) depthoid))
(cond ((null key)
(the fixnum 480929))
((zerop depthoid)
(the fixnum 779578))
(t
(mix (psxhash (car key) (1- depthoid))
(psxhash (cdr key) (1- depthoid))))))
(defun hash-table-psxhash (key)
(declare (optimize speed))
(declare (type hash-table key))
(let ((result 103924836))
(declare (type fixnum result))
(mixf result (hash-table-count key))
(mixf result (sxhash (hash-table-test key)))
result))
(defun number-psxhash (key)
(declare (optimize speed))
(declare (type number key))
(flet ((sxhash-double-float (val)
(declare (type double-float val))
;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the
;; resulting code works without consing. (In Debian cmucl 2.4.17,
;; it didn't.)
(sxhash val)))
(etypecase key
(integer (sxhash key))
(float (macrolet ((frob (type)
(let ((lo (coerce most-negative-fixnum type))
(hi (coerce most-positive-fixnum type)))
`(cond (;; This clause allows FIXNUM-sized integer
;; values to be handled without consing.
(<= ,lo key ,hi)
(multiple-value-bind (q r)
(floor (the (,type ,lo ,hi) key))
(if (zerop (the ,type r))
(sxhash q)
(sxhash-double-float
(coerce key 'double-float)))))
(t
(multiple-value-bind (q r) (floor key)
(if (zerop (the ,type r))
(sxhash q)
(sxhash-double-float
(coerce key 'double-float)))))))))
(etypecase key
(single-float (frob single-float))
(double-float (frob double-float))
#!+long-float
(long-float (error "LONG-FLOAT not currently supported")))))
(rational (if (and (<= most-negative-double-float
key
most-positive-double-float)
(= (coerce key 'double-float) key))
(sxhash-double-float (coerce key 'double-float))
(sxhash key)))
(complex (if (zerop (imagpart key))
(number-psxhash (realpart key))
(let ((result 330231))
(declare (type fixnum result))
(mixf result (number-psxhash (realpart key)))
(mixf result (number-psxhash (imagpart key)))
result))))))