From: Douglas K. <sn...@us...> - 2015-01-07 16:05:47
|
The branch "master" has been updated in SBCL: via b048faf53e15c59cbd0d2066b5fb50808cc43efe (commit) from 0852224e20a6b596e030c7db3f1b05eba4f83072 (commit) - Log ----------------------------------------------------------------- commit b048faf53e15c59cbd0d2066b5fb50808cc43efe Author: Douglas Katzman <do...@go...> Date: Wed Jan 7 11:03:36 2015 -0500 x86-64: Improve disassembler's DETERMINE-OPCODE-BOUNDS --- src/compiler/target-disassem.lisp | 11 ++++++++++- src/compiler/x86-64/target-insts.lisp | 29 ++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/compiler/target-disassem.lisp b/src/compiler/target-disassem.lisp index 4e62cc1..f83ed09 100644 --- a/src/compiler/target-disassem.lisp +++ b/src/compiler/target-disassem.lisp @@ -264,6 +264,9 @@ ;; Length of the memory range excluding any trailing untagged data. ;; Defaults to 'length' but could be shorter. (opcodes-length 0 :type disassem-length) + ;; The number of pad bytes after opcodes-length that should be dumped + ;; as raw bytes prior to displaying raw bytes that are genuinely data. + (opcode-trailer-length 0 :type disassem-length) (virtual-location 0 :type address) (storage-info nil :type (or null storage-info)) ;; For backends which support unboxed constants within the segment, @@ -1682,7 +1685,13 @@ (defstruct code-constant-raw value) (def!method print-object ((self code-constant-raw) stream) - (format stream "#x~8,'0x" (code-constant-raw-value self))) + ;; A raw code constant is never a Lisp object, so if it is a cons, + ;; then it's the address of raw data- mnemonic is that (x) is like [x]. + ;; Ideally we would want to show is as [Label] and then label the + ;; corresponding unboxed data. + (if (listp (code-constant-raw-value self)) + (format stream "[#x~x]" (car (code-constant-raw-value self))) + (format stream "#x~8,'0x" (code-constant-raw-value self)))) (defun get-code-constant-absolute (addr dstate &optional width) (declare (type address addr)) diff --git a/src/compiler/x86-64/target-insts.lisp b/src/compiler/x86-64/target-insts.lisp index 086ea17..b728f87 100644 --- a/src/compiler/x86-64/target-insts.lisp +++ b/src/compiler/x86-64/target-insts.lisp @@ -28,6 +28,11 @@ ;;; register implies a size. ;;; (defun print-mem-ref (mode value width stream dstate) + ;; :COMPUTE is used for the LEA instruction - it informs this function + ;; that the address is not a memory reference below which is confined + ;; the disassembly - the heuristic for detecting the start of unboxed data. + ;; LEA is sometimes used to compute the start of a local function for + ;; allocate-closures, and it points to valid instructions, not data. (declare (type (member :ref :sized-ref :compute) mode) (type list value) (type (member nil :byte :word :dword :qword) width) @@ -77,16 +82,17 @@ 1 (sb!disassem::note-code-constant-absolute addr dstate width)) (sb!disassem:maybe-note-assembler-routine - addr nil dstate)))))) + addr nil dstate) + ;; lacking anything better, show the absolute address + (sb!disassem:note (format nil "[#x~x]" addr) dstate)))))) (firstp - (progn (sb!disassem:princ16 offset stream) (or (minusp offset) (nth-value 1 (sb!disassem::note-code-constant-absolute offset dstate)) (sb!disassem:maybe-note-assembler-routine offset nil - dstate)))) + dstate))) (t (princ offset stream))))))) (write-char #\] stream) @@ -132,6 +138,11 @@ ;; For each reference, record its length so that it will subsequently ;; display the proper number of bytes. +;; FIXME: I really think it ought to be possible to cut down to 2 passes +;; from the current three passes that are made over each segment +;; (one to determine opcode bounds, one to label, one to display). +;; If, on the labeling pass, we just stop looking at bytes when encountering +;; an address that intersects a known unboxed data ref, then we're done. (defun determine-opcode-bounds (seglist dstate) (flet ((mem-ref (displacement size more-segments) (let ((seg (dstate-segment dstate)) @@ -175,7 +186,9 @@ ;; It's probably zero-fill. "ADD [RAX],AL" encodes as {0,0} ;; and is the most likely reason to chop one byte and stop. (unless (< last-inst-end-ofs (seg-opcodes-length seg)) - (setf (seg-opcodes-length seg) last-inst-start-ofs))))) + (let ((n-skip (- (seg-opcodes-length seg) last-inst-start-ofs))) + (setf (seg-opcodes-length seg) last-inst-start-ofs + (seg-opcode-trailer-length seg) n-skip)))))) (setf (dstate-get-prop dstate :rip-relative-mem-ref-hook) nil)) (defun disassemble-unboxed-data (segment stream dstate) @@ -192,8 +205,14 @@ (incf (dstate-cur-offs dstate) nbytes))) ;; Demarcate just before the first byte of 0-fill (if any) rather than at ;; the first location which was referenced as data, because it looks - ;; nicer to have no incomplete instructions prior to that. + ;; nicer to have no incomplete instructions prior to that, + ;; though some may spuriously decode as a legal sequence of zero bytes. + ;; e.g. on x86-64 the shortest valid sequence is two zeros, so we'll never + ;; decode one zero as an instruction, but will instead show it as a pad. (format stream "~&; Unboxed data:") + (let ((n-skip (seg-opcode-trailer-length segment))) + (when (plusp n-skip) + (hexdump n-skip))) ;; The way to guarantee we have the exact 'data-start' is to track refs ;; from all disassembly segments to all others. This is not trivial, ;; so not implemented. ----------------------------------------------------------------------- hooks/post-receive -- SBCL |