;; only if the command exists, obviously, would you heed this advice
;; syntax: taskset -p 10 # substitute any 1 random bit in the mask for "10"
(format t "~&;; Please ensure that 'taskset' was used to bind this Lisp (pid ~D) to one CPU~%"
(sb-unix:unix-getpid))
(in-package sb-vm)
(defknown ultra-fast-fill (sb-sys:system-area-pointer t fixnum) (values))
(define-vop (ultra-fast-fill)
(:translate ultra-fast-fill)
(:args (sap :scs (sap-reg) :target rdi)
(item :scs (descriptor-reg) :target rax)
(count :scs (unsigned-reg) :target rcx))
(:arg-types system-area-pointer t positive-fixnum)
(:temporary (:sc unsigned-reg :offset rdi-offset :from (:argument 0)) rdi)
(:temporary (:sc unsigned-reg :offset rax-offset :from (:argument 1)) rax)
(:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 2)) rcx)
(:policy :fast-safe)
(:generator 3
(move rdi sap)
(move rax item)
(move rcx count)
(inst rep)
(inst stos rax)
;; Fence, because Intel allows writes from REP STOSQ to be
;; seen out-of-order by other CPUs.
(inst sfence)))
;; Given TIMINGS, a 2D array indexed by procedure number and trial number,
;; for each procedure, sort the times and discard the lowest 2 and highest 2.
;; The sum of the inside values is the total for that procedure.
#| Example:
* (best-of-timings #2a((0 10000 80 -1 5 10) (2 2 2 2 2 2) (0 1 3000 4000 6000 5000)) t)
Summarizing over 3 procedures 6 trials
Procedure 0 sorted results are #(-1 0 5 10 80 10000), sum excluding outliers is 15
Procedure 1 sorted results are #(2 2 2 2 2 2), sum excluding outliers is 4
Procedure 2 sorted results are #(0 1 3000 4000 5000 6000), sum excluding outliers is 7000
((8 37.5) (2 0.0) (3500 14.285715))
|#
(defun best-of-timings (timings &optional detail)
(let* ((n-procedures (array-dimension timings 0))
(n-trials (array-dimension timings 1))
(output (make-list n-procedures)))
(when detail
(format t "~& Summarizing over ~D procedures ~D trials~%"
n-procedures n-trials))
(dotimes (proc-number n-procedures output)
(let ((trials
(make-array n-trials
:displaced-to timings
:displaced-index-offset (* proc-number n-trials))))
(sort trials #'<)
(let* ((points (subseq trials 2 (- n-trials 2)))
(sum (reduce #'+ points))
(avg (ceiling sum (length points)))
(spread (* 100 (/ (float (max (- (reduce #'max points) avg)
(- avg (reduce #'min points))))
avg))))
(when detail
(format t " Procedure ~D sorted results are ~S, sum excluding outliers is ~D~%"
proc-number trials sum))
(setf (elt output proc-number) (list avg spread)))))))
(defun try-ways (procedures &key (n-trials 20))
(let ((n-do-overs 0))
(loop
(let ((timings (make-array (list (length procedures) n-trials)
:initial-element nil)))
(dotimes (trial-number (the fixnum n-trials))
(loop for proc in procedures
for proc-index from 0
do (multiple-value-bind (h0 l0)
(sb-impl::read-cycle-counter)
(funcall proc)
(multiple-value-bind (h1 l1)
(sb-impl::read-cycle-counter)
(setf (aref timings proc-index trial-number)
(sb-impl::elapsed-cycles h0 l0 h1 l1))))))
(let* ((timings (best-of-timings timings))
(best (reduce #'min timings :key #'car))
(worst (reduce #'max timings :key #'car))
(best-index (position best timings :key #'car)))
(format t "~& Timings:~:{ [~f ± ~3f%]~} - best is ~:R way (ratio=~f)~%"
timings (1+ best-index)
(/ (float worst) best))
;; if no test had more than 5% measurement error, great
(cond ((<= (reduce #'max (mapcar #'second timings)) 5)
(return))
((<= (incf n-do-overs) 4) ; repeat up to 4 more times
(format t "~& Repeating test due to excess noise~%"))
(t
(format t "~& >>> TEST IS UNSTABLE~%")
(return))))))))
;; N-PASSES is make these "slow" enough to eliminate measurement noise
(defun fillway1 (n-passes v value)
(declare (optimize speed (safety 0) (sb-c::insert-array-bounds-checks 0))
(simple-vector v))
(loop repeat (the fixnum n-passes) do (fill v value))
v)
(defun fillway2 (n-passes v value)
(declare (optimize speed (safety 0)))
(loop repeat (the fixnum n-passes)
do (ultra-fast-fill (sb-sys:vector-sap v) value (length v)))
v)
(defun testfill ()
(dolist (size '(1 2 3 4 5 6 7 10 15 20 50 80 100 200 500 800
1000 2000 4000 8000 10000 20000 30000 50000 100000))
(format t "Testing size ~D~%" size)
(let ((v (make-array size))
(n-fills (floor 1000000 size)))
(sb-int:dx-flet ((timeway1 () (fillway1 n-fills v 'foo))
(timeway2 () (fillway2 n-fills v 'foo)))
(try-ways (list #'timeway1 #'timeway2))))))
#| Sample run:
* (testfill)
Testing size 1
Timings: [5966735.0 ± 8.4%] [26052264.0 ± .38%] - best is first way (ratio=4.366251)
Repeating test due to excess noise
Timings: [5754482.0 ± 13.%] [26013080.0 ± .09%] - best is first way (ratio=4.52049)
Repeating test due to excess noise
Timings: [5997568.0 ± .66%] [26012900.0 ± .07%] - best is first way (ratio=4.337241)
Testing size 2
Timings: [3954817.0 ± 9.7%] [13004015.0 ± .11%] - best is first way (ratio=3.2881458)
Repeating test due to excess noise
Timings: [3879478.0 ± 9.8%] [13008346.0 ± .08%] - best is first way (ratio=3.3531177)
Repeating test due to excess noise
Timings: [3936134.0 ± 8.2%] [13004956.0 ± .13%] - best is first way (ratio=3.3039923)
Repeating test due to excess noise
Timings: [3906457.0 ± 10.%] [13011486.0 ± .14%] - best is first way (ratio=3.3307638)
Repeating test due to excess noise
Timings: [3917181.0 ± 11.%] [13004452.0 ± .18%] - best is first way (ratio=3.3198497)
>>> TEST IS UNSTABLE
Testing size 3
Timings: [3333519.0 ± .01%] [8670553.0 ± .25%] - best is first way (ratio=2.601021)
Testing size 4
Timings: [3325654.0 ± 23.%] [6501932.0 ± .18%] - best is first way (ratio=1.9550837)
Repeating test due to excess noise
Timings: [3104118.0 ± 32.%] [6502665.0 ± .12%] - best is first way (ratio=2.094851)
Repeating test due to excess noise
Timings: [3163138.0 ± 29.%] [6503098.0 ± .22%] - best is first way (ratio=2.0559008)
Repeating test due to excess noise
Timings: [3237883.0 ± 26.%] [6502575.0 ± .17%] - best is first way (ratio=2.0082798)
Repeating test due to excess noise
Timings: [3161263.0 ± 29.%] [6501499.0 ± .16%] - best is first way (ratio=2.0566144)
>>> TEST IS UNSTABLE
Testing size 5
Timings: [3666823.0 ± 0.0%] [5203167.0 ± .17%] - best is first way (ratio=1.418985)
Testing size 6
Timings: [3444604.0 ± .01%] [4335598.0 ± .22%] - best is first way (ratio=1.2586637)
Testing size 7
Timings: [3428768.0 ± .02%] [3715849.0 ± .29%] - best is first way (ratio=1.0837271)
Testing size 10
Timings: [2952966.0 ± .56%] [2600353.0 ± .11%] - best is second way (ratio=1.135602)
Testing size 15
Timings: [2668420.0 ± .33%] [2333486.0 ± .01%] - best is second way (ratio=1.1435337)
Testing size 20
Timings: [2751940.0 ± .32%] [2000147.0 ± 0.0%] - best is second way (ratio=1.3758689)
Testing size 50
Timings: [2300155.0 ± .01%] [1400589.0 ± .44%] - best is second way (ratio=1.6422769)
Testing size 80
Timings: [3262643.0 ± 0.0%] [1064054.0 ± 1.0%] - best is second way (ratio=3.0662382)
Testing size 100
Timings: [3212221.0 ± .36%] [900145.0 ± .01%] - best is second way (ratio=3.5685594)
Testing size 200
Timings: [3106761.0 ± .27%] [730149.0 ± .01%] - best is second way (ratio=4.2549686)
Testing size 500
Timings: [3043406.0 ± 0.3%] [580145.0 ± .02%] - best is second way (ratio=5.24594)
Testing size 800
Timings: [3027824.0 ± .37%] [561413.0 ± .05%] - best is second way (ratio=5.3932204)
Testing size 1000
Timings: [3021154.0 ± .01%] [546143.0 ± .01%] - best is second way (ratio=5.5318003)
Testing size 2000
Timings: [3011098.0 ± .23%] [524640.0 ± .01%] - best is second way (ratio=5.7393603)
Testing size 4000
Timings: [3006296.0 ± .44%] [512506.0 ± .01%] - best is second way (ratio=5.865875)
Testing size 8000
Timings: [3006412.0 ± .94%] [819532.0 ± .01%] - best is second way (ratio=3.6684499)
Testing size 10000
Timings: [3004545.0 ± .75%] [818184.0 ± .02%] - best is second way (ratio=3.672212)
Testing size 20000
Timings: [3002762.0 ± .46%] [815265.0 ± .09%] - best is second way (ratio=3.683173)
Testing size 30000
Timings: [2977921.0 ± 1.1%] [839053.0 ± .88%] - best is second way (ratio=3.5491452)
Testing size 50000
Timings: [3003996.0 ± .65%] [1004756.0 ± .26%] - best is second way (ratio=2.9897766)
Testing size 100000
Timings: [3004692.0 ± .63%] [1140418.0 ± .81%] - best is second way (ratio=2.6347287)
NIL
|#