From: Nikodemus S. <de...@us...> - 2008-05-17 11:02:42
|
Update of /cvsroot/sbcl/sbcl/src/code In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv19319/src/code Modified Files: cold-init.lisp early-full-eval.lisp eval.lisp time.lisp Log Message: 1.0.16.35: improved TIME output * Print measured times using fixed-width decimal output with the measured precision, instead of converting to floats for printing. * Report processor cycle counts on x86 and x86-64. ** Since Intel doesn't seem to consider it necessary to issue a CPUID both before and after RDTSC, maybe we don't need to do that either. ** New feature, :CYCLE-COUNTER, for platforms that implement SB-VM::%READ-CYCLE-COUNTER. * Instead of reporting %EVAL calls, report "interpreted forms", which means both %EVAL and SIMPLE-EVAL-IN-LEXENV. * Report "lambdas converted" for the compiler, not counting TL-XEPs. * Report CPU percentage (computed from real and run time.) * Report total run time separately. Condence run time output slightly by reporting total, user, and system on the same line. * Report non-GC time as well. * Condence output by omitting page faults, converted lambdas, and interpreted forms when they are zero. Index: cold-init.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/code/cold-init.lisp,v retrieving revision 1.76 retrieving revision 1.77 diff -u -d -r1.76 -r1.77 --- cold-init.lisp 19 Feb 2008 09:20:11 -0000 1.76 +++ cold-init.lisp 17 May 2008 11:02:28 -0000 1.77 @@ -108,8 +108,7 @@ sb!kernel::*gc-epoch* (cons nil nil)) ;; I'm not sure where eval is first called, so I put this first. - #!+sb-eval - (show-and-call sb!eval::!full-eval-cold-init) + (show-and-call !eval-cold-init) (show-and-call thread-init-or-reinit) (show-and-call !typecheckfuns-cold-init) Index: early-full-eval.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/code/early-full-eval.lisp,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- early-full-eval.lisp 14 Sep 2006 21:10:44 -0000 1.3 +++ early-full-eval.lisp 17 May 2008 11:02:29 -0000 1.4 @@ -12,15 +12,8 @@ (in-package "SB!EVAL") (defparameter *eval-level* -1) -(defparameter *eval-calls* 0) (defparameter *eval-verbose* nil) -(defun !full-eval-cold-init () - (setf *eval-level* -1 - *eval-calls* 0 - *eval-verbose* nil - *evaluator-mode* :compile)) - ;; !defstruct-with-alternate-metaclass is unslammable and the ;; RECOMPILE restart doesn't work on it. This is the main reason why ;; this stuff is split out into its own file. Also, it lets the Index: eval.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/code/eval.lisp,v retrieving revision 1.40 retrieving revision 1.41 diff -u -d -r1.40 -r1.41 --- eval.lisp 10 Dec 2007 05:46:00 -0000 1.40 +++ eval.lisp 17 May 2008 11:02:29 -0000 1.41 @@ -11,6 +11,15 @@ (in-package "SB!IMPL") +(defparameter *eval-calls* 0) + +(defun !eval-cold-init () + (setf *eval-calls* 0 + *evaluator-mode* :compile) + #!+sb-eval + (setf sb!eval::*eval-level* -1 + sb!eval::*eval-verbose* nil)) + ;;; general case of EVAL (except in that it can't handle toplevel ;;; EVAL-WHEN magic properly): Delegate to #'COMPILE. (defun %simple-eval (expr lexenv) @@ -93,6 +102,7 @@ (defun simple-eval-in-lexenv (original-exp lexenv) (declare (optimize (safety 1))) ;; (aver (lexenv-simple-p lexenv)) + (incf *eval-calls*) (handler-bind ((sb!c:compiler-error (lambda (c) Index: time.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/code/time.lisp,v retrieving revision 1.35 retrieving revision 1.36 diff -u -d -r1.35 -r1.36 --- time.lisp 5 Apr 2007 12:24:30 -0000 1.35 +++ time.lisp 17 May 2008 11:02:29 -0000 1.36 @@ -265,7 +265,21 @@ (defmacro time (form) #!+sb-doc - "Execute FORM and print timing information on *TRACE-OUTPUT*." + "Execute FORM and print timing information on *TRACE-OUTPUT*. + +On some hardware platforms estimated processor cycle counts are +included in this output; this number is slightly inflated, since it +includes the pipeline involved in reading the cycle counter -- +executing \(TIME NIL) a few times will give you an idea of the +overhead, and its variance. The cycle counters are also per processor, +not per thread: if multiple threads are running on the same processor, +the reported counts will include cycles taken up by all threads +running on the processor where TIME was executed. Furthermore, if the +operating system migrates the thread to another processor between +reads of the cycle counter, the results will be completely bogus. +Finally, the counter is cycle counter, incremented by the hardware +even when the process is halted -- which is to say that cycles pass +normally during operations like SLEEP." `(%time (lambda () ,form))) ;;; Return all the data that we want TIME to report. @@ -273,6 +287,71 @@ (multiple-value-bind (user sys faults) (sb!sys:get-system-info) (values user sys faults (get-bytes-consed)))) + +(defun elapsed-cycles (h0 l0 h1 l1) + (declare (ignorable h0 l0 h1 l1)) + #!+cycle-counter + (+ (ash (- h1 h0) 32) + (- l1 l0)) + #!-cycle-counter + nil) +(declaim (inline read-cycle-counter)) +(defun read-cycle-counter () + #!+cycle-counter + (sb!vm::%read-cycle-counter) + #!-cycle-counter + (values 0 0)) + +;;; This is so that we don't have to worry about the vagaries of +;;; floating point printing, or about conversions to floats dropping +;;; or introducing decimals, which are liable to imply wrong precision. +(defun format-microseconds (stream usec &optional colonp atp) + (declare (ignore colonp)) + (%format-decimal stream usec 6) + (unless atp + (write-string " seconds" stream))) + +(defun format-milliseconds (stream usec &optional colonp atp) + (declare (ignore colonp)) + (%format-decimal stream usec 3) + (unless atp + (write-string " seconds" stream))) + +(defun %format-decimal (stream number power) + (declare (stream stream) + (integer number power)) + (when (minusp number) + (write-char #\- stream) + (setf number (- number))) + (let ((scale (expt 10 power))) + (flet ((%fraction (fraction) + (let ((scaled (* 10 fraction))) + (loop while (< scaled scale) + do (write-char #\0 stream) + (setf scaled (* scaled 10)))) + (format stream "~D" fraction)) + (%zeroes () + (let ((scaled (/ scale 10))) + (write-char #\0 stream) + (loop while (> scaled 1) + do (write-char #\0 stream) + (setf scaled (/ scaled 10)))))) + (cond ((zerop number) + (write-string "0." stream) + (%zeroes)) + ((< number scale) + (write-string "0." stream) + (%fraction number)) + ((= number scale) + (write-string "1." stream) + (%zeroes)) + ((> number scale) + (multiple-value-bind (whole fraction) (floor number scale) + (format stream "~D." whole) + (%fraction fraction)))))) + + nil) + ;;; The guts of the TIME macro. Compute overheads, run the (compiled) ;;; function, report the times. (defun %time (fun) @@ -316,35 +395,53 @@ (old-run-utime old-run-stime old-page-faults old-bytes-consed) (time-get-sys-info)) (setq old-real-time (get-internal-real-time)) - (let ((start-gc-run-time *gc-run-time*) - #!+sb-eval (sb!eval:*eval-calls* 0)) - (declare #!+sb-eval (special sb!eval:*eval-calls*)) - (multiple-value-prog1 - ;; Execute the form and return its values. - (funcall fun) - (multiple-value-setq - (new-run-utime new-run-stime new-page-faults new-bytes-consed) - (time-get-sys-info)) - (setq new-real-time (- (get-internal-real-time) real-time-overhead)) - (let ((gc-run-time (max (- *gc-run-time* start-gc-run-time) 0))) - (format *trace-output* - "~&Evaluation took:~% ~ - ~S second~:P of real time~% ~ - ~S second~:P of user run time~% ~ - ~S second~:P of system run time~% ~ - ~@[[Run times include ~S second~:P GC run time.]~% ~]~ - ~@[~S call~:P to %EVAL~% ~]~ - ~S page fault~:P and~% ~ - ~:D bytes consed.~%" - (max (/ (- new-real-time old-real-time) - (float sb!xc:internal-time-units-per-second)) - 0.0) - (max (/ (- new-run-utime old-run-utime) 1000000.0) 0.0) - (max (/ (- new-run-stime old-run-stime) 1000000.0) 0.0) - (unless (zerop gc-run-time) - (/ (float gc-run-time) - (float sb!xc:internal-time-units-per-second))) - #!+sb-eval sb!eval:*eval-calls* #!-sb-eval nil - (max (- new-page-faults old-page-faults) 0) - (max (- new-bytes-consed old-bytes-consed) 0))))))) - + (let ((start-gc-internal-run-time *gc-run-time*) + (*eval-calls* 0) + (sb!c::*lambda-conversions* 0)) + (declare (special *eval-calls* sb!c::*lambda-conversions*)) + (multiple-value-bind (h0 l0) (read-cycle-counter) + (multiple-value-prog1 + ;; Execute the form and return its values. + (funcall fun) + (multiple-value-bind (h1 l1) (read-cycle-counter) + (let ((stop-gc-internal-run-time *gc-run-time*)) + (multiple-value-setq + (new-run-utime new-run-stime new-page-faults new-bytes-consed) + (time-get-sys-info)) + (setq new-real-time (- (get-internal-real-time) real-time-overhead)) + (let* ((gc-internal-run-time (max (- stop-gc-internal-run-time start-gc-internal-run-time) 0)) + (real-time (max (- new-real-time old-real-time) 0)) + (user-run-time (max (- new-run-utime old-run-utime) 0)) + (system-run-time (max (- new-run-stime old-run-stime) 0)) + (total-run-time (+ user-run-time system-run-time)) + (cycles (elapsed-cycles h0 l0 h1 l1)) + (page-faults (max (- new-page-faults old-page-faults) 0))) + (format *trace-output* + "~&Evaluation took:~%~ + ~@< ~@;~/sb-impl::format-milliseconds/ of real time~%~ + ~/sb-impl::format-microseconds/ of total run time ~ + (~@/sb-impl::format-microseconds/ user, ~@/sb-impl::format-microseconds/ system)~%~ + ~[[ Run times consist of ~/sb-impl::format-milliseconds/ GC time, ~ + and ~/sb-impl::format-milliseconds/ non-GC time. ]~%~;~2*~]~ + ~,2F% CPU~%~ + ~@[~:D form~:P interpreted~%~]~ + ~@[~:D lambda~:P converted~%~]~ + ~@[~:D processor cycles~%~]~ + ~@[~:D page fault~:P~%~]~ + ~:D bytes consed~:>~%" + real-time + total-run-time + user-run-time + system-run-time + (if (zerop gc-internal-run-time) 1 0) + gc-internal-run-time + ;; Round up so we don't mislead by saying 0.0 seconds of non-GC time... + (- (ceiling total-run-time 1000) gc-internal-run-time) + (if (zerop real-time) + 100.0 + (float (* 100 (/ (round total-run-time 1000) real-time)))) + (unless (zerop *eval-calls*) *eval-calls*) + (unless (zerop sb!c::*lambda-conversions*) sb!c::*lambda-conversions*) + cycles + (unless (zerop page-faults) page-faults) + (max (- new-bytes-consed old-bytes-consed) 0)))))))))) |