[5d1093]: src / runtime / x86-64-assem.S Maximize Restore History

Download this file

x86-64-assem.S    568 lines (510 with data), 14.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
/*
* very-low-level utilities for runtime support
*/
/*
* This software is part of the SBCL system. See the README file for
* more information.
*
* This software is derived from the CMU CL system, which was
* written at Carnegie Mellon University and released into the
* public domain. The software is in the public domain and is
* provided with absolutely no warranty. See the COPYING and CREDITS
* files for more information.
*/
#define LANGUAGE_ASSEMBLY
#include "genesis/config.h"
#include "validate.h"
#include "sbcl.h"
#include "genesis/closure.h"
#include "genesis/funcallable-instance.h"
#include "genesis/fdefn.h"
#include "genesis/static-symbols.h"
#include "genesis/symbol.h"
#include "genesis/thread.h"
/* Minimize conditionalization for different OS naming schemes. */
#if defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __sun || defined _WIN64
#define GNAME(var) var
#else
#define GNAME(var) _##var
#endif
/* Get the right type of alignment. Linux, FreeBSD and OpenBSD
* want alignment in bytes. */
#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined __NetBSD__ || defined(__sun) || defined _WIN64
#define align_4byte 4
#define align_8byte 8
#define align_16byte 16
#define align_32byte 32
#define align_page 32768
#else
#define align_4byte 2
#define align_8byte 3
#define align_16byte 4
#define align_page 15
#endif
/*
* The assembler used for win32 doesn't like .type or .size directives,
* so we want to conditionally kill them out. So let's wrap them in macros
* that are defined to be no-ops on win32. Hopefully this still works on
* other platforms.
*/
#if !defined(LISP_FEATURE_WIN32) && !defined(LISP_FEATURE_DARWIN)
#define TYPE(name) .type name,@function
#define SIZE(name) .size name,.-name
#define DOLLAR(name) $(name)
#else
#define TYPE(name)
#define SIZE(name)
#endif
/*
* x86/darwin (as of MacOS X 10.4.5) doesn't reliably fire signal
* handlers (SIGTRAP or Mach exception handlers) for 0xCC, wo we have
* to use ud2 instead. ud2 is an undefined opcode, #x0b0f, or
* 0F 0B in low-endian notation, that causes SIGILL to fire. We check
* for this instruction in the SIGILL handler and if we see it, we
* advance the EIP by two bytes to skip over ud2 instruction and
* call sigtrap_handler. */
#if defined(LISP_FEATURE_UD2_BREAKPOINTS)
#define TRAP ud2
#else
#define TRAP int3
#endif
/*
* More Apple assembler hacks
*/
#if defined(LISP_FEATURE_DARWIN)
/* global symbol x86-64 sym(%rip) hack:*/
#define GSYM(name) name(%rip)
#else
#define GSYM(name) $name
#endif
.text
.globl GNAME(all_threads)
/* From lower to higher-numbered addresses, the stack contains
* return address, arg 0, arg 1, arg 2 ...
* rax contains the address of the function to call
* Lisp expects return value in rax, whic is already consistent with C
* XXXX correct floating point handling is unimplemented so far
* Based on comments cleaned from x86-assem.S, we believe that
* Lisp is expecting us to preserve rsi, rdi, rsp (no idea about r8-15)
*/
.text
.align align_16byte,0x90
.globl GNAME(call_into_c)
TYPE(GNAME(call_into_c))
GNAME(call_into_c):
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
push %rsi # args are going in here
push %rdi
mov 16(%rbp),%rdi
mov 24(%rbp),%rsi
mov 32(%rbp),%rdx
mov 40(%rbp),%rcx
mov 48(%rbp),%rcx
mov 56(%rbp),%r8
mov 64(%rbp),%r9
call *%rax
mov %rbp,%rsp
pop %rbp
ret
SIZE(GNAME(call_into_c))
.text
.globl GNAME(call_into_lisp_first_time)
TYPE(GNAME(call_into_lisp_first_time))
/* We don't worry too much about saving registers
* here, because we never expect to return from the initial call to lisp
* anyway */
.align align_16byte,0x90
GNAME(call_into_lisp_first_time):
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
#if defined(LISP_FEATURE_DARWIN)
movq GSYM(GNAME(all_threads)),%rax
#else
movq GNAME(all_threads),%rax
#endif
mov THREAD_CONTROL_STACK_END_OFFSET(%rax) ,%rsp
jmp Lstack
.text
.globl GNAME(call_into_lisp)
TYPE(GNAME(call_into_lisp))
/*
* amd64 calling convention: C expects that
* arguments go in rdi rsi rdx rcx r8 r9
* return values in rax rdx
* callee saves rbp rbx r12-15 if it uses them
*/
#ifdef LISP_FEATURE_WIN32
# define SUPPORT_FOMIT_FRAME_POINTER
#endif
.align align_16byte,0x90
GNAME(call_into_lisp):
#ifdef SUPPORT_FOMIT_FRAME_POINTER
mov %rbp,%rax
#endif
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
Lstack:
#ifdef SUPPORT_FOMIT_FRAME_POINTER
/* If called through call_into_lisp_first_time, %r15 becomes invalid
* here, but we will not return in that case. */
push %r15
mov %rax,%r15
#endif
/* FIXME x86 saves FPU state here */
push %rbx # these regs are callee-saved according to C
push %r12 # so must be preserved and restored when
push %r13 # the lisp function returns
push %r14 #
push %r15 #
mov %rsp,%rbx # remember current stack
push %rbx # Save entry stack on (maybe) new stack.
push %rdi # args from C
push %rsi #
push %rdx #
#ifdef LISP_FEATURE_SB_THREAD
# ifdef SUPPORT_FOMIT_FRAME_POINTER
mov (%rbp),%rcx
sub $32,%rsp
call GNAME(carry_frame_pointer)
add $32,%rsp
mov %rax,(%rbp)
# endif
#ifdef LISP_FEATURE_GCC_TLS
movq %fs:0, %rax
movq GNAME(current_thread)@TPOFF(%rax), %r12
#else
#ifdef LISP_FEATURE_DARWIN
mov GSYM(GNAME(specials)),%rdi
#else
mov specials,%rdi
#endif
call GNAME(pthread_getspecific)
mov %rax,%r12
#endif
#endif
pop %rcx # num args
pop %rbx # arg vector
pop %rax # function ptr/lexenv
xor %rdx,%rdx # clear any descriptor registers
xor %rdi,%rdi # that we can't be sure we'll
xor %rsi,%rsi # initialise properly. XX do r8-r15 too?
cmp $0,%rcx
# It's tempting to think 'cmov' for these assignments, but don't:
# cmov does a memory cycle whether or not it moves, succumbing to
# a classic buffer overrun bug if argv[] is "badly" placed.
je Ldone
mov 0(%rbx),%rdx # arg0
cmp $1,%rcx
je Ldone
mov 8(%rbx),%rdi # arg1
cmp $2,%rcx
je Ldone
mov 16(%rbx),%rsi # arg2
Ldone:
shl $(N_FIXNUM_TAG_BITS),%rcx # (fixnumize num-args)
/* Registers rax, rcx, rdx, rdi, and rsi are now live. */
xor %rbx,%rbx # available
/* Alloc new frame. */
push %rbp # Dummy for return address
push %rbp # fp in save location S1
mov %rsp,%rbp # The current sp marks start of new frame.
sub $8,%rsp # Ensure 3 slots are allocated, two above.
Lcall:
call *CLOSURE_FUN_OFFSET(%rax)
/* If the function returned multiple values, it will return to
this point. Lose them */
jnc LsingleValue
mov %rbx, %rsp
LsingleValue:
/* Restore the stack, in case there was a stack change. */
pop %rsp # c-sp
/* Restore C regs */
pop %r15
pop %r14
pop %r13
pop %r12
pop %rbx
/* FIXME Restore the NPX state. */
mov %rdx,%rax # c-val
#ifdef SUPPORT_FOMIT_FRAME_POINTER
mov %r15,%rbp # orig rbp
pop %r15 # orig r15
add $8,%rsp # no need for saved (overridden) rbp
#else
leave
#endif
ret
SIZE(GNAME(call_into_lisp))
/* support for saving and restoring the NPX state from C */
.text
.globl GNAME(fpu_save)
TYPE(GNAME(fpu_save))
.align align_16byte,0x90
GNAME(fpu_save):
fnsave (%rdi) # Save the NPX state. (resets NPX)
ret
SIZE(GNAME(fpu_save))
.globl GNAME(fpu_restore)
TYPE(GNAME(fpu_restore))
.align align_16byte,0x90
GNAME(fpu_restore):
frstor (%rdi) # Restore the NPX state.
ret
SIZE(GNAME(fpu_restore))
/*
* the undefined-function trampoline
*/
.text
.align align_16byte,0x90
.globl GNAME(undefined_tramp)
TYPE(GNAME(undefined_tramp))
GNAME(undefined_tramp):
pop 8(%rbp) # Save return PC for backtrace.
TRAP
.byte trap_Error
.byte 2
.byte UNDEFINED_FUN_ERROR
.byte sc_DescriptorReg # eax in the Descriptor-reg SC
ret
SIZE(GNAME(undefined_tramp))
.text
.align align_16byte,0x90
.globl GNAME(undefined_alien_function)
TYPE(GNAME(undefined_alien_function))
GNAME(undefined_alien_function):
pop 8(%rbp) # Save return PC for backtrace.
TRAP
.byte trap_Error
.byte 4
.byte UNDEFINED_ALIEN_FUN_ERROR
/* Encode RBX
FIXME: make independt of the encoding changes. */
.byte 0xFE
.byte 0x9F
.byte 0x01
ret
SIZE(GNAME(undefined_alien_function))
/* KLUDGE: FIND-ESCAPED-FRAME (SYS:SRC;CODE;DEBUG-INT.LISP) needs
* to know the name of the function immediately following the
* undefined-function trampoline. */
/* Our call-site does not take care of caller-saved xmm registers, so it
* falls to us spill them beforing hopping into C.
*
* We simply save all of them.
*
* (But for the sake of completeness, here is my understanding of the specs:)
* System V Microsoft
* argument passing xmm0-7 xmm0-3
* caller-saved xmm8-15 xmm4-5
* callee-saved - xmm6-15
*
* --DFL */
#define stkxmmsave(n) movaps %xmm##n, n*16(%rsp)
#define stkxmmload(n) movaps n*16(%rsp), %xmm##n
#define map_all_xmm(op) \
op(0);op(1);op(2);op(3);op(4);op(5);op(6);op(7); \
op(8);op(9);op(10);op(11);op(12);op(13);op(14);op(15);
.text
.align align_16byte,0x90
.globl GNAME(alloc_tramp)
TYPE(GNAME(alloc_tramp))
GNAME(alloc_tramp):
cld
push %rbp # Save old frame pointer.
mov %rsp,%rbp # Establish new frame.
and $-32,%rsp
sub $16*16,%rsp
map_all_xmm(stkxmmsave)
push %rax
push %rcx
push %rdx
push %rsi
push %rdi
push %r8
push %r9
push %r10
push %r11
push %r11
mov 16(%rbp),%rdi
call GNAME(alloc)
mov %rax,16(%rbp)
pop %r11
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rdx
pop %rcx
pop %rax
map_all_xmm(stkxmmload)
mov %rbp,%rsp
pop %rbp
ret
SIZE(GNAME(alloc_tramp))
/*
* the closure trampoline
*/
.text
.align align_16byte,0x90
.globl GNAME(closure_tramp)
TYPE(GNAME(closure_tramp))
GNAME(closure_tramp):
mov FDEFN_FUN_OFFSET(%rax),%rax
/* FIXME: The '*' after "jmp" in the next line is from PVE's
* patch posted to the CMU CL mailing list Oct 6, 1999. It looks
* reasonable, and it certainly seems as though if CMU CL needs it,
* SBCL needs it too, but I haven't actually verified that it's
* right. It would be good to find a way to force the flow of
* control through here to test it. */
jmp *CLOSURE_FUN_OFFSET(%rax)
SIZE(GNAME(closure_tramp))
.text
.align align_16byte,0x90
.globl GNAME(funcallable_instance_tramp)
#if !defined(LISP_FEATURE_DARWIN)
TYPE(GNAME(funcallable_instance_tramp))
#endif
GNAME(funcallable_instance_tramp):
mov FUNCALLABLE_INSTANCE_FUNCTION_OFFSET(%rax),%rax
/* KLUDGE: on this platform, whatever kind of function is in %rax
* now, the first word of it contains the address to jump to. */
jmp *CLOSURE_FUN_OFFSET(%rax)
#if !defined(LISP_FEATURE_DARWIN)
SIZE(GNAME(funcallable_instance_tramp))
#endif
/*
* fun-end breakpoint magic
*/
/*
* For an explanation of the magic involved in function-end
* breakpoints, see the implementation in ppc-assem.S.
*/
.text
.globl GNAME(fun_end_breakpoint_guts)
.align align_16byte
GNAME(fun_end_breakpoint_guts):
/* Multiple Value return */
jc multiple_value_return
/* Single value return: The eventual return will now use the
multiple values return convention but with a return values
count of one. */
mov %rsp,%rbx # Setup ebx - the ofp.
sub $8,%rsp # Allocate one stack slot for the return value
mov $8,%rcx # Setup ecx for one return value.
#if defined(LISP_FEATURE_DARWIN)
mov GSYM(NIL),%rdi # default second value
mov GSYM(NIL),%rsi # default third value
#else
mov $NIL,%rdi # default second value
mov $NIL,%rsi # default third value
#endif
multiple_value_return:
.globl GNAME(fun_end_breakpoint_trap)
.align align_16byte,0x90
GNAME(fun_end_breakpoint_trap):
TRAP
.byte trap_FunEndBreakpoint
hlt # We should never return here.
.globl GNAME(fun_end_breakpoint_end)
GNAME(fun_end_breakpoint_end):
.globl GNAME(do_pending_interrupt)
TYPE(GNAME(do_pending_interrupt))
.align align_16byte,0x90
GNAME(do_pending_interrupt):
TRAP
.byte trap_PendingInterrupt
ret
SIZE(GNAME(do_pending_interrupt))
.globl GNAME(post_signal_tramp)
TYPE(GNAME(post_signal_tramp))
.align align_16byte,0x90
GNAME(post_signal_tramp):
/* this is notionally the second half of a function whose first half
* doesn't exist. This is where call_into_lisp returns when called
* using return_to_lisp_function */
popq %r15
popq %r14
popq %r13
popq %r12
popq %r11
popq %r10
popq %r9
popq %r8
popq %rdi
popq %rsi
/* skip RBP and RSP */
popq %rbx
popq %rdx
popq %rcx
popq %rax
popfq
leave
ret
SIZE(GNAME(post_signal_tramp))
/* When LISP_FEATURE_C_STACK_IS_CONTROL_STACK, we cannot safely scrub
* the control stack from C, largely due to not knowing where the
* active stack frame ends. On such platforms, we reimplement the
* core scrubbing logic in assembly, in this case here:
*/
.text
.align align_16byte,0x90
.globl GNAME(arch_scrub_control_stack)
TYPE(GNAME(arch_scrub_control_stack))
GNAME(arch_scrub_control_stack):
/* We are passed three parameters:
* A (struct thread *) in RDI,
* the address of the guard page in RSI, and
* the address of the hard guard page in RDX.
* We may trash RAX, RCX, and R8-R11 with impunity.
* [RSP] is our return address, [RSP-8] is the first
* stack slot to scrub. */
/* We start by setting up our scrub pointer in RAX, our
* guard page upper bound in R8, and our hard guard
* page upper bound in R9. */
lea -8(%rsp), %rax
#ifdef LISP_FEATURE_DARWIN
mov GSYM(GNAME(os_vm_page_size)),%r9
#else
mov os_vm_page_size,%r9
#endif
lea (%rsi,%r9), %r8
lea (%rdx,%r9), %r9
/* Now we begin our main scrub loop. */
ascs_outer_loop:
/* If we're about to scrub the hard guard page, exit. */
cmp %r9, %rax
jae ascs_check_guard_page
cmp %rax, %rdx
jbe ascs_finished
ascs_check_guard_page:
/* If we're about to scrub the guard page, and the guard
* page is protected, exit. */
cmp %r8, %rax
jae ascs_clear_loop
cmp %rax, %rsi
ja ascs_clear_loop
cmpq $(NIL), THREAD_CONTROL_STACK_GUARD_PAGE_PROTECTED_OFFSET(%rdi)
jne ascs_finished
/* Clear memory backwards to the start of the (4KiB) page */
ascs_clear_loop:
movq $0, (%rax)
test $0xfff, %rax
lea -8(%rax), %rax
jnz ascs_clear_loop
/* If we're about to hit the hard guard page, exit. */
cmp %r9, %rax
jae ascs_finished
/* If the next (previous?) 4KiB page contains a non-zero
* word, continue scrubbing. */
ascs_check_loop:
testq $-1, (%rax)
jnz ascs_outer_loop
test $0xfff, %rax
lea -8(%rax), %rax
jnz ascs_check_loop
ascs_finished:
ret
SIZE(GNAME(arch_scrub_control_stack))