You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(32) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
1
(3) |
2
(26) |
3
(15) |
4
(19) |
5
(16) |
6
(16) |
7
(13) |
|
8
(1) |
9
(12) |
10
|
11
(4) |
12
(17) |
13
(21) |
14
(15) |
|
15
(12) |
16
(14) |
17
(14) |
18
(12) |
19
(16) |
20
(27) |
21
(37) |
|
22
(25) |
23
(23) |
24
(14) |
25
(14) |
26
(14) |
27
(14) |
28
(11) |
|
29
(3) |
30
(13) |
|
|
|
|
|
|
From: <sv...@va...> - 2012-04-20 00:14:10
|
sewardj 2012-04-20 01:14:02 +0100 (Fri, 20 Apr 2012)
New Revision: 12512
Log:
Add translation chaining support for ppc32 (tested) and to
a large extent for ppc64 (incomplete, untested) (Valgrind side)
Modified files:
branches/TCHAIN/coregrind/m_dispatch/dispatch-arm-linux.S
branches/TCHAIN/coregrind/m_dispatch/dispatch-ppc32-linux.S
branches/TCHAIN/coregrind/m_dispatch/dispatch-x86-linux.S
branches/TCHAIN/docs/internals/t-chaining-notes.txt
branches/TCHAIN/memcheck/mc_machine.c
Modified: branches/TCHAIN/memcheck/mc_machine.c (+1 -1)
===================================================================
--- branches/TCHAIN/memcheck/mc_machine.c 2012-04-19 23:38:24 +01:00 (rev 12511)
+++ branches/TCHAIN/memcheck/mc_machine.c 2012-04-20 01:14:02 +01:00 (rev 12512)
@@ -393,7 +393,7 @@
if (o == GOF(CIA) && sz == 4) return -1;
if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */
- if (o == GOF(FPROUND) && sz == 4) return -1;
+ if (o == GOF(FPROUND) && sz == 1) return -1;
if (o == GOF(VRSAVE) && sz == 4) return -1;
if (o == GOF(EMWARN) && sz == 4) return -1;
if (o == GOF(TISTART) && sz == 4) return -1;
Modified: branches/TCHAIN/coregrind/m_dispatch/dispatch-x86-linux.S (+3 -2)
===================================================================
--- branches/TCHAIN/coregrind/m_dispatch/dispatch-x86-linux.S 2012-04-19 23:38:24 +01:00 (rev 12511)
+++ branches/TCHAIN/coregrind/m_dispatch/dispatch-x86-linux.S 2012-04-20 01:14:02 +01:00 (rev 12512)
@@ -39,8 +39,9 @@
/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
Modified: branches/TCHAIN/coregrind/m_dispatch/dispatch-ppc32-linux.S (+218 -342)
===================================================================
--- branches/TCHAIN/coregrind/m_dispatch/dispatch-ppc32-linux.S 2012-04-19 23:38:24 +01:00 (rev 12511)
+++ branches/TCHAIN/coregrind/m_dispatch/dispatch-ppc32-linux.S 2012-04-20 01:14:02 +01:00 (rev 12512)
@@ -39,24 +39,28 @@
/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/*----------------------------------------------------*/
-/*--- Preamble (set everything up) ---*/
+/*--- Entry and preamble (set everything up) ---*/
/*----------------------------------------------------*/
/* signature:
-UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
*/
.text
-.globl VG_(run_innerloop)
-.type VG_(run_innerloop), @function
-VG_(run_innerloop):
- /* r3 holds guest_state */
- /* r4 holds do_profiling */
+.globl VG_(disp_run_translations)
+.type VG_(disp_run_translations), @function
+VG_(disp_run_translations):
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
/* ----- entry point to VG_(run_innerloop) ----- */
/* For Linux/ppc32 we need the SysV ABI, which uses
@@ -66,17 +70,17 @@
*/
/* Save lr */
- mflr 0
- stw 0,4(1)
+ mflr 6
+ stw 6,4(1)
/* New stack frame */
stwu 1,-496(1) /* sp should maintain 16-byte alignment */
/* Save callee-saved registers... */
- /* r3, r4 are live here, so use r5 */
- lis 5,VG_(machine_ppc32_has_FP)@ha
- lwz 5,VG_(machine_ppc32_has_FP)@l(5)
- cmplwi 5,0
+ /* r3, r4, r5 are live here, so use r6 */
+ lis 6,VG_(machine_ppc32_has_FP)@ha
+ lwz 6,VG_(machine_ppc32_has_FP)@l(6)
+ cmplwi 6,0
beq LafterFP1
/* Floating-point reg save area : 144 bytes */
@@ -119,67 +123,65 @@
stw 16,288(1)
stw 15,284(1)
stw 14,280(1)
- /* Probably not necessary to save r13 (thread-specific ptr),
- as VEX stays clear of it... but what the hey. */
stw 13,276(1)
+ stw 3,272(1) /* save two_words for later */
/* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
The Linux kernel might not actually use VRSAVE for its intended
purpose, but it should be harmless to preserve anyway. */
- /* r3, r4 are live here, so use r5 */
- lis 5,VG_(machine_ppc32_has_VMX)@ha
- lwz 5,VG_(machine_ppc32_has_VMX)@l(5)
- cmplwi 5,0
+ /* r3, r4, r5 are live here, so use r6 */
+ lis 6,VG_(machine_ppc32_has_VMX)@ha
+ lwz 6,VG_(machine_ppc32_has_VMX)@l(6)
+ cmplwi 6,0
beq LafterVMX1
#ifdef HAS_ALTIVEC
/* VRSAVE save word : 32 bytes */
- mfspr 5,256 /* vrsave reg is spr number 256 */
- stw 5,244(1)
+ mfspr 6,256 /* vrsave reg is spr number 256 */
+ stw 6,244(1)
/* Alignment padding : 4 bytes */
/* Vector reg save area (quadword aligned) : 192 bytes */
- li 5,224
- stvx 31,5,1
- li 5,208
- stvx 30,5,1
- li 5,192
- stvx 29,5,1
- li 5,176
- stvx 28,5,1
- li 5,160
- stvx 27,5,1
- li 5,144
- stvx 26,5,1
- li 5,128
- stvx 25,5,1
- li 5,112
- stvx 25,5,1
- li 5,96
- stvx 23,5,1
- li 5,80
- stvx 22,5,1
- li 5,64
- stvx 21,5,1
- li 5,48
- stvx 20,5,1
+ li 6,224
+ stvx 31,6,1
+ li 6,208
+ stvx 30,6,1
+ li 6,192
+ stvx 29,6,1
+ li 6,176
+ stvx 28,6,1
+ li 6,160
+ stvx 27,6,1
+ li 6,144
+ stvx 26,6,1
+ li 6,128
+ stvx 25,6,1
+ li 6,112
+ stvx 25,6,1
+ li 6,96
+ stvx 23,6,1
+ li 6,80
+ stvx 22,6,1
+ li 6,64
+ stvx 21,6,1
+ li 6,48
+ stvx 20,6,1
#endif
LafterVMX1:
/* Save cr */
- mfcr 0
- stw 0,44(1)
+ mfcr 6
+ stw 6,44(1)
/* Local variable space... */
/* 32(sp) used later to check FPSCR[RM] */
- /* r3 holds guest_state */
- /* r4 holds do_profiling */
- mr 31,3 /* r31 (generated code gsp) = r3 */
- stw 3,28(1) /* spill orig guest_state ptr */
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
/* 24(sp) used later to stop ctr reg being clobbered */
/* 20(sp) used later to load fpscr with zero */
@@ -190,36 +192,29 @@
0(sp) : back-chain
*/
- /* CAB TODO: Use a caller-saved reg for orig guest_state ptr
- - rem to set non-allocateable in isel.c */
-
- /* hold dispatch_ctr in r29 */
- lis 5,VG_(dispatch_ctr)@ha
- lwz 29,VG_(dispatch_ctr)@l(5)
-
/* set host FPU control word to the default mode expected
by VEX-generated code. See comments in libvex.h for
more info. */
- lis 5,VG_(machine_ppc32_has_FP)@ha
- lwz 5,VG_(machine_ppc32_has_FP)@l(5)
- cmplwi 5,0
+ lis 6,VG_(machine_ppc32_has_FP)@ha
+ lwz 6,VG_(machine_ppc32_has_FP)@l(6)
+ cmplwi 6,0
beq LafterFP2
/* get zero into f3 (tedious) */
/* note: fsub 3,3,3 is not a reliable way to do this,
since if f3 holds a NaN or similar then we don't necessarily
wind up with zero. */
- li 5,0
- stw 5,20(1)
+ li 6,0
+ stw 6,20(1)
lfs 3,20(1)
mtfsf 0xFF,3 /* fpscr = f3 */
LafterFP2:
/* set host AltiVec control word to the default mode expected
by VEX-generated code. */
- lis 5,VG_(machine_ppc32_has_VMX)@ha
- lwz 5,VG_(machine_ppc32_has_VMX)@l(5)
- cmplwi 5,0
+ lis 6,VG_(machine_ppc32_has_VMX)@ha
+ lwz 6,VG_(machine_ppc32_has_VMX)@l(6)
+ cmplwi 6,0
beq LafterVMX2
#ifdef HAS_ALTIVEC
@@ -232,172 +227,40 @@
/* make a stack frame for the code we are calling */
stwu 1,-16(1)
- /* fetch %CIA into r3 */
- lwz 3,OFFSET_ppc32_CIA(31)
+ /* Set up the guest state ptr */
+ mr 31,4 /* r31 (generated code gsp) = r4 */
- /* fall into main loop (the right one) */
- /* r4 = do_profiling. It's probably trashed after here,
- but that's OK: we don't need it after here. */
- cmplwi 4,0
- beq VG_(run_innerloop__dispatch_unprofiled)
- b VG_(run_innerloop__dispatch_profiled)
+ /* and jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ mtctr 5
+ bctr
/*NOTREACHED*/
/*----------------------------------------------------*/
-/*--- NO-PROFILING (standard) dispatcher ---*/
+/*--- Postamble and exit. ---*/
/*----------------------------------------------------*/
-.global VG_(run_innerloop__dispatch_unprofiled)
-VG_(run_innerloop__dispatch_unprofiled):
- /* At entry: Live regs:
- r1 (=sp)
- r3 (=CIA = next guest address)
- r29 (=dispatch_ctr)
- r31 (=guest_state)
- */
- /* Has the guest state pointer been messed with? If yes, exit.
- Also set up & VG_(tt_fast) early in an attempt at better
- scheduling. */
- lis 5,VG_(tt_fast)@ha
- addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
- andi. 0,31,1
- bne gsp_changed
+postamble:
+ /* At this point, r6 and r7 contain two
+ words to be returned to the caller. r6
+ holds a TRC value, and r7 optionally may
+ hold another word (for CHAIN_ME exits, the
+ address of the place to patch.) */
- /* save the jump address in the guest state */
- stw 3,OFFSET_ppc32_CIA(31)
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- subi 29,29,1
- cmplwi 29,0
- beq counter_is_zero
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
- rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- lwz 6,0(5) /* .guest */
- lwz 7,4(5) /* .host */
- cmpw 3,6
- bne fast_lookup_failed
-
- /* Found a match. Call .host. */
- mtctr 7
- bctrl
-
- /* On return from guest code:
- r3 holds destination (original) address.
- r31 may be unchanged (guest_state), or may indicate further
- details of the control transfer requested to *r3.
- */
- /* start over */
- b VG_(run_innerloop__dispatch_unprofiled)
- /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- PROFILING dispatcher (can be much slower) ---*/
-/*----------------------------------------------------*/
-
-.global VG_(run_innerloop__dispatch_profiled)
-VG_(run_innerloop__dispatch_profiled):
- /* At entry: Live regs:
- r1 (=sp)
- r3 (=CIA = next guest address)
- r29 (=dispatch_ctr)
- r31 (=guest_state)
- */
- /* Has the guest state pointer been messed with? If yes, exit.
- Also set up & VG_(tt_fast) early in an attempt at better
- scheduling. */
- lis 5,VG_(tt_fast)@ha
- addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
- andi. 0,31,1
- bne gsp_changed
-
- /* save the jump address in the guest state */
- stw 3,OFFSET_ppc32_CIA(31)
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- subi 29,29,1
- cmplwi 29,0
- beq counter_is_zero
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
- rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- lwz 6,0(5) /* .guest */
- lwz 7,4(5) /* .host */
- cmpw 3,6
- bne fast_lookup_failed
-
- /* increment bb profile counter */
- srwi 4,4,1 /* entry# * sizeof(UInt*) */
- addis 6,4,VG_(tt_fastN)@ha
- lwz 9,VG_(tt_fastN)@l(6)
- lwz 8,0(9)
- addi 8,8,1
- stw 8,0(9)
-
- /* Found a match. Call .host. */
- mtctr 7
- bctrl
-
- /* On return from guest code:
- r3 holds destination (original) address.
- r31 may be unchanged (guest_state), or may indicate further
- details of the control transfer requested to *r3.
- */
- /* start over */
- b VG_(run_innerloop__dispatch_profiled)
- /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- exit points ---*/
-/*----------------------------------------------------*/
-
-gsp_changed:
- /* Someone messed with the gsp (in r31). Have to
- defer to scheduler to resolve this. dispatch ctr
- is not yet decremented, so no need to increment. */
- /* %CIA is NOT up to date here. First, need to write
- %r3 back to %CIA, but without trashing %r31 since
- that holds the value we want to return to the scheduler.
- Hence use %r5 transiently for the guest state pointer. */
- lwz 5,44(1) /* original guest_state ptr */
- stw 3,OFFSET_ppc32_CIA(5)
- mr 3,31 /* r3 = new gsp value */
- b run_innerloop_exit
- /*NOTREACHED*/
-
-counter_is_zero:
- /* %CIA is up to date */
- /* back out decrement of the dispatch counter */
- addi 29,29,1
- li 3,VG_TRC_INNER_COUNTERZERO
- b run_innerloop_exit
-
-fast_lookup_failed:
- /* %CIA is up to date */
- /* back out decrement of the dispatch counter */
- addi 29,29,1
- li 3,VG_TRC_INNER_FASTMISS
- b run_innerloop_exit
-
-
-
-/* All exits from the dispatcher go through here.
- r3 holds the return value.
-*/
-run_innerloop_exit:
/* We're leaving. Check that nobody messed with
- VSCR or FPSCR. */
-
+ VSCR or FPSCR in ways we don't expect. */
/* Using r10 - value used again further on, so don't trash! */
lis 10,VG_(machine_ppc32_has_FP)@ha
lwz 10,VG_(machine_ppc32_has_FP)@l(10)
- cmplwi 10,0
+
+ /* Using r11 - value used again further on, so don't trash! */
+ lis 11,VG_(machine_ppc32_has_VMX)@ha
+ lwz 11,VG_(machine_ppc32_has_VMX)@l(11)
+
+ cmplwi 10,0 /* Do we have FP ? */
beq LafterFP8
/* Set fpscr back to a known state, since vex-generated code
@@ -410,10 +273,7 @@
mtfsf 0xFF,3 /* fpscr = f3 */
LafterFP8:
- /* Using r11 - value used again further on, so don't trash! */
- lis 11,VG_(machine_ppc32_has_VMX)@ha
- lwz 11,VG_(machine_ppc32_has_VMX)@l(11)
- cmplwi 11,0
+ cmplwi 11,0 /* Do we have altivec? */
beq LafterVMX8
#ifdef HAS_ALTIVEC
@@ -432,29 +292,15 @@
LafterVMX8:
/* otherwise we're OK */
- b run_innerloop_exit_REALLY
+ b remove_frame
-
invariant_violation:
- li 3,VG_TRC_INVARIANT_FAILED
- b run_innerloop_exit_REALLY
+ li 6,VG_TRC_INVARIANT_FAILED
+ li 7,0
+ /* fall through */
-run_innerloop_exit_REALLY:
- /* r3 holds VG_TRC_* value to return */
-
- /* Return to parent stack */
- addi 1,1,16
-
- /* Write ctr to VG(dispatch_ctr) */
- lis 5,VG_(dispatch_ctr)@ha
- stw 29,VG_(dispatch_ctr)@l(5)
-
- /* Restore cr */
- lwz 0,44(1)
- mtcr 0
-
- /* Restore callee-saved registers... */
-
+remove_frame:
+ /* Restore FP regs */
/* r10 already holds VG_(machine_ppc32_has_FP) value */
cmplwi 10,0
beq LafterFP9
@@ -480,31 +326,11 @@
lfd 14,352(1)
LafterFP9:
- /* General regs */
- lwz 31,348(1)
- lwz 30,344(1)
- lwz 29,340(1)
- lwz 28,336(1)
- lwz 27,332(1)
- lwz 26,328(1)
- lwz 25,324(1)
- lwz 24,320(1)
- lwz 23,316(1)
- lwz 22,312(1)
- lwz 21,308(1)
- lwz 20,304(1)
- lwz 19,300(1)
- lwz 18,296(1)
- lwz 17,292(1)
- lwz 16,288(1)
- lwz 15,284(1)
- lwz 14,280(1)
- lwz 13,276(1)
-
/* r11 already holds VG_(machine_ppc32_has_VMX) value */
cmplwi 11,0
beq LafterVMX9
+ /* Restore Altivec regs */
#ifdef HAS_ALTIVEC
/* VRSAVE */
lwz 4,244(1)
@@ -538,93 +364,143 @@
#endif
LafterVMX9:
- /* reset lr & sp */
+ /* restore int regs, including importantly r3 (two_words) */
+ addi 1,1,16
+ lwz 31,348(1)
+ lwz 30,344(1)
+ lwz 29,340(1)
+ lwz 28,336(1)
+ lwz 27,332(1)
+ lwz 26,328(1)
+ lwz 25,324(1)
+ lwz 24,320(1)
+ lwz 23,316(1)
+ lwz 22,312(1)
+ lwz 21,308(1)
+ lwz 20,304(1)
+ lwz 19,300(1)
+ lwz 18,296(1)
+ lwz 17,292(1)
+ lwz 16,288(1)
+ lwz 15,284(1)
+ lwz 14,280(1)
+ lwz 13,276(1)
+ lwz 3,272(1)
+ /* Stash return values */
+ stw 6,0(3)
+ stw 7,4(3)
+
+ /* restore lr & sp, and leave */
lwz 0,500(1) /* stack_size + 4 */
mtlr 0
addi 1,1,496 /* stack_size */
blr
-.size VG_(run_innerloop), .-VG_(run_innerloop)
-/*------------------------------------------------------------*/
-/*--- ---*/
-/*--- A special dispatcher, for running no-redir ---*/
-/*--- translations. Just runs the given translation once. ---*/
-/*--- ---*/
-/*------------------------------------------------------------*/
+/*----------------------------------------------------*/
+/*--- Continuation points ---*/
+/*----------------------------------------------------*/
-/* signature:
-void VG_(run_a_noredir_translation) ( UWord* argblock );
-*/
+/* ------ Chain me to slow entry point ------ */
+.global VG_(disp_cp_chain_me_to_slowEP)
+VG_(disp_cp_chain_me_to_slowEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
+ mflr 7
+ /* 8 = imm32 r30, disp_cp_chain_me_to_slowEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,8+4+4
+ b postamble
-/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
- and 2 to carry results:
- 0: input: ptr to translation
- 1: input: ptr to guest state
- 2: output: next guest PC
- 3: output: guest state pointer afterwards (== thread return code)
-*/
-.global VG_(run_a_noredir_translation)
-.type VG_(run_a_noredir_translation), @function
-VG_(run_a_noredir_translation):
- /* save callee-save int regs, & lr */
- stwu 1,-256(1)
- stw 14,128(1)
- stw 15,132(1)
- stw 16,136(1)
- stw 17,140(1)
- stw 18,144(1)
- stw 19,148(1)
- stw 20,152(1)
- stw 21,156(1)
- stw 22,160(1)
- stw 23,164(1)
- stw 24,168(1)
- stw 25,172(1)
- stw 26,176(1)
- stw 27,180(1)
- stw 28,184(1)
- stw 29,188(1)
- stw 30,192(1)
- stw 31,196(1)
- mflr 31
- stw 31,200(1)
+/* ------ Chain me to fast entry point ------ */
+.global VG_(disp_cp_chain_me_to_fastEP)
+VG_(disp_cp_chain_me_to_fastEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
+ mflr 7
+ /* 8 = imm32 r30, disp_cp_chain_me_to_fastEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,8+4+4
+ b postamble
- stw 3,204(1)
- lwz 31,4(3)
- lwz 30,0(3)
- mtlr 30
- blrl
+/* ------ Indirect but boring jump ------ */
+.global VG_(disp_cp_xindir)
+VG_(disp_cp_xindir):
+ /* Where are we going? */
+ lwz 3,OFFSET_ppc32_CIA(31)
- lwz 4,204(1)
- stw 3, 8(4)
- stw 31,12(4)
+ /* stats only */
+ lis 5,VG_(stats__n_xindirs)@ha
+ addi 5,5,VG_(stats__n_xindirs)@l
+ lwz 6,4(5)
+ addic. 6,6,1
+ stw 6,4(5)
+ lwz 6,0(5)
+ addze 6,6
+ stw 6,0(5)
+
+ /* r5 = &VG_(tt_fast) */
+ lis 5,VG_(tt_fast)@ha
+ addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
- lwz 14,128(1)
- lwz 15,132(1)
- lwz 16,136(1)
- lwz 17,140(1)
- lwz 18,144(1)
- lwz 19,148(1)
- lwz 20,152(1)
- lwz 21,156(1)
- lwz 22,160(1)
- lwz 23,164(1)
- lwz 24,168(1)
- lwz 25,172(1)
- lwz 26,176(1)
- lwz 27,180(1)
- lwz 28,184(1)
- lwz 29,188(1)
- lwz 30,192(1)
- lwz 31,200(1)
- mtlr 31
- lwz 31,196(1)
- addi 1,1,256
- blr
-.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
+ rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ lwz 6,0(5) /* .guest */
+ lwz 7,4(5) /* .host */
+ cmpw 3,6
+ bne fast_lookup_failed
+ /* Found a match. Jump to .host. */
+ mtctr 7
+ bctr
+fast_lookup_failed:
+ /* stats only */
+ lis 5,VG_(stats__n_xindir_misses)@ha
+ addi 5,5,VG_(stats__n_xindir_misses)@l
+ lwz 6,4(5)
+ addic. 6,6,1
+ stw 6,4(5)
+ lwz 6,0(5)
+ addze 6,6
+ stw 6,0(5)
+
+ li 6,VG_TRC_INNER_FASTMISS
+ li 7,0
+ b postamble
+ /*NOTREACHED*/
+
+/* ------ Assisted jump ------ */
+.global VG_(disp_cp_xassisted)
+VG_(disp_cp_xassisted):
+ /* r31 contains the TRC */
+ mr 6,31
+ li 7,0
+ b postamble
+
+/* ------ Event check failed ------ */
+.global VG_(disp_cp_evcheck_fail)
+VG_(disp_cp_evcheck_fail):
+ li 6,VG_TRC_INNER_COUNTERZERO
+ li 7,0
+ b postamble
+
+
+.size VG_(disp_run_translations), .-VG_(disp_run_translations)
+
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
Modified: branches/TCHAIN/coregrind/m_dispatch/dispatch-arm-linux.S (+3 -2)
===================================================================
--- branches/TCHAIN/coregrind/m_dispatch/dispatch-arm-linux.S 2012-04-19 23:38:24 +01:00 (rev 12511)
+++ branches/TCHAIN/coregrind/m_dispatch/dispatch-arm-linux.S 2012-04-20 01:14:02 +01:00 (rev 12512)
@@ -40,8 +40,9 @@
/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
Modified: branches/TCHAIN/docs/internals/t-chaining-notes.txt (+12 -2)
===================================================================
--- branches/TCHAIN/docs/internals/t-chaining-notes.txt 2012-04-19 23:38:24 +01:00 (rev 12511)
+++ branches/TCHAIN/docs/internals/t-chaining-notes.txt 2012-04-20 01:14:02 +01:00 (rev 12512)
@@ -1,6 +1,6 @@
DO NOT MERGE
-~~~~~~~~~~~
+~~~~~~~~~~~~
Changes memcheck/tests/Makefile.am w.r.t. -mfloat-abi=softfp
Ditto none/tests/arm/Makefile.am
@@ -28,16 +28,26 @@
records from the patchers, instead of {0,0}, so that transparent
self hosting works properly.
+host_ppc_defs.h: is RdWrLR still needed? If not delete.
+ditto ARM, Ld8S
+
+make sure IRStmt_Exit3 is completely gone.
+
+all backends: iselStmt(Ist_Exit) vs iselNext: make sure that the same
+JKs are handled, else it's not safe against branch sense switching
+
+
Optimisations
~~~~~~~~~~~~~
all targets: change VG_(stats__n_xindirs) to a 32 bit counter, and
-empty out every now and again.
+empty out every now and again. Ditto VG_(stats__n_xindir_misses).
amd64: XDirect: write const value to guest_RIP using single
insn when the value is < 0x8000'0000
arm: chain_XDirect: generate short form jumps when possible
+ppc: chain_XDirect: generate short form jumps when possible
arm codegen: Generate ORRS for CmpwNEZ32(Or32(x,y))
|
|
From: <sv...@va...> - 2012-04-20 00:13:37
|
sewardj 2012-04-20 01:13:28 +0100 (Fri, 20 Apr 2012)
New Revision: 2289
Log:
Add translation chaining support for ppc32 (tested) and to
a large extent for ppc64 (incomplete, untested) (VEX side)
Modified files:
branches/TCHAIN/priv/guest_ppc_defs.h
branches/TCHAIN/priv/guest_ppc_helpers.c
branches/TCHAIN/priv/guest_ppc_toIR.c
branches/TCHAIN/priv/host_arm_defs.c
branches/TCHAIN/priv/host_ppc_defs.c
branches/TCHAIN/priv/host_ppc_defs.h
branches/TCHAIN/priv/host_ppc_isel.c
branches/TCHAIN/priv/main_main.c
branches/TCHAIN/pub/libvex_guest_ppc32.h
branches/TCHAIN/pub/libvex_guest_ppc64.h
Modified: branches/TCHAIN/pub/libvex_guest_ppc32.h (+6 -0)
===================================================================
--- branches/TCHAIN/pub/libvex_guest_ppc32.h 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/pub/libvex_guest_ppc32.h 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -48,6 +48,12 @@
typedef
struct {
+ /* Event check fail addr and counter. */
+ /* 0 */ UInt host_EvC_FAILADDR;
+ /* 4 */ UInt host_EvC_COUNTER;
+ /* 8 */ UInt pad3;
+ /* 12 */ UInt pad4;
+ /* Add 16 to all the numbers below. Sigh. */
/* General Purpose Registers */
/* 0 */ UInt guest_GPR0;
/* 4 */ UInt guest_GPR1;
Modified: branches/TCHAIN/priv/guest_ppc_defs.h (+0 -1)
===================================================================
--- branches/TCHAIN/priv/guest_ppc_defs.h 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/priv/guest_ppc_defs.h 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -48,7 +48,6 @@
bb_to_IR.h. */
extern
DisResult disInstr_PPC ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
Modified: branches/TCHAIN/priv/host_arm_defs.c (+2 -2)
===================================================================
--- branches/TCHAIN/priv/host_arm_defs.c 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/priv/host_arm_defs.c 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -4488,7 +4488,7 @@
}
-/* How big is an event check? See case for Ain_EvCheck in
+/* How big is an event check? See case for ARMin_EvCheck in
emit_ARMInstr just above. That crosschecks what this returns, so
we can tell if we're inconsistent. */
Int evCheckSzB_ARM ( void )
@@ -4569,7 +4569,7 @@
/* Patch the counter address into a profile inc point, as previously
- created by the Xin_ProfInc case for emit_ARMInstr. */
+ created by the ARMin_ProfInc case for emit_ARMInstr. */
VexInvalRange patchProfInc_ARM ( void* place_to_patch,
ULong* location_of_counter )
{
Modified: branches/TCHAIN/priv/host_ppc_isel.c (+204 -57)
===================================================================
--- branches/TCHAIN/priv/host_ppc_isel.c 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/priv/host_ppc_isel.c 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -218,18 +218,21 @@
- A mapping from IRTemp to HReg. This tells the insn selector
which virtual register(s) are associated with each IRTemp
- temporary. This is computed before insn selection starts, and
- does not change. We expect this mapping to map precisely the
- same set of IRTemps as the type mapping does.
+ temporary. This is computed before insn selection starts, and
+ does not change. We expect this mapping to map precisely the
+ same set of IRTemps as the type mapping does.
- - vregmap holds the primary register for the IRTemp.
- - vregmapHI holds the secondary register for the IRTemp,
+ - vregmapLo holds the primary register for the IRTemp.
+ - vregmapMedLo holds the secondary register for the IRTemp,
if any is needed. That's only for Ity_I64 temps
in 32 bit mode or Ity_I128 temps in 64-bit mode.
+ - vregmapMedHi is only for dealing with Ity_I128 temps in
+ 32 bit mode. It holds bits 95:64 (Intel numbering)
+ of the IRTemp.
+ - vregmapHi is also only for dealing with Ity_I128 temps
+ in 32 bit mode. It holds the most significant bits
+ (127:96 in Intel numbering) of the IRTemp.
- - The name of the vreg in which we stash a copy of the link reg,
- so helper functions don't kill it.
-
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
@@ -247,11 +250,20 @@
described in set_FPU_rounding_mode below.
- A VexMiscInfo*, needed for knowing how to generate
- function calls for this target
+ function calls for this target.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any
+ insn in this block. Is set at the start and does not change.
+ This is used for detecting jumps which are definitely
+ forward-edges from this block, and therefore can be made
+ (chained) to the fast entry point of the destination, thereby
+ avoiding the destination's event check.
*/
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
// 64-bit mode 32-bit mode
HReg* vregmapLo; // Low 64-bits [63:0] Low 32-bits [31:0]
@@ -260,20 +272,21 @@
HReg* vregmapHi; // unused highest 32-bits [127:96]
Int n_vregmap;
- HReg savedLR;
-
- HInstrArray* code;
-
- Int vreg_ctr;
-
/* 27 Jan 06: Not currently used, but should be */
UInt hwcaps;
Bool mode64;
+ VexAbiInfo* vbi;
+
+ Bool chainingAllowed;
+ Addr64 max_ga;
+
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
+
IRExpr* previous_rm;
-
- VexAbiInfo* vbi;
}
ISelEnv;
@@ -4545,18 +4558,60 @@
/* --------- EXIT --------- */
case Ist_Exit: {
- PPCRI* ri_dst;
- PPCCondCode cc;
- IRConstTag tag = stmt->Ist.Exit.dst->tag;
- if (!mode64 && (tag != Ico_U32))
+ IRConst* dst = stmt->Ist.Exit.dst;
+ if (!mode64 && dst->tag != Ico_U32)
vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
- if (mode64 && (tag != Ico_U64))
+ if (mode64 && dst->tag != Ico_U64)
vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
- ri_dst = iselWordExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env,stmt->Ist.Exit.guard);
- addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
- addInstr(env, PPCInstr_Goto(stmt->Ist.Exit.jk, cc, ri_dst));
- return;
+
+ PPCCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ PPCAMode* amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
+ hregPPC_GPR31(mode64));
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring
+ || stmt->Ist.Exit.jk == Ijk_Call
+ /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = mode64
+ ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
+ : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, PPCInstr_XDirect(
+ mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
+ : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
+ amCIA, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ //case Ijk_MapFail:
+ //case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn:
+ case Ijk_NoDecode: case Ijk_SigBUS: case Ijk_SigTRAP:
+ {
+ HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
+ stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
@@ -4571,21 +4626,91 @@
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- PPCCondCode cond;
- PPCRI* ri;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
}
- cond = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
- ri = iselWordExpr_RI(env, next);
- addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
- addInstr(env, PPCInstr_Goto(jk, cond, ri));
+
+ PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = env->mode64
+ ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
+ : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, PPCInstr_XDirect(
+ env->mode64 ? (Addr64)cdst->Ico.U64
+ : (Addr64)cdst->Ico.U32,
+ amCIA, always, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselWordExpr_R(env, next);
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselWordExpr_R(env, next);
+ PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
+ if (env->chainingAllowed) {
+ addInstr(env, PPCInstr_XIndir(r, amCIA, always));
+ } else {
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoDecode:
+ case Ijk_EmWarn: case Ijk_SigTRAP: case Ijk_TInval:
+ case Ijk_NoRedir:
+ //case Ijk_Sys_int128:
+ //case Ijk_Yield:
+ {
+ HReg r = iselWordExpr_R(env, next);
+ PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
@@ -4593,20 +4718,29 @@
/*--- Insn selector top-level ---*/
/*---------------------------------------------------------*/
-/* Translate an entire BS to ppc code. */
-HInstrArray* iselSB_PPC ( IRSB* bb, VexArch arch_host,
+/* Translate an entire SB to ppc code. */
+HInstrArray* iselSB_PPC ( IRSB* bb,
+ VexArch arch_host,
VexArchInfo* archinfo_host,
- VexAbiInfo* vbi )
+ VexAbiInfo* vbi,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
- Int i, j;
- HReg hregLo, hregMedLo, hregMedHi, hregHi;
- ISelEnv* env;
- UInt hwcaps_host = archinfo_host->hwcaps;
- Bool mode64 = False;
- UInt mask32, mask64;
+ Int i, j;
+ HReg hregLo, hregMedLo, hregMedHi, hregHi;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ Bool mode64 = False;
+ UInt mask32, mask64;
+ PPCAMode *amCounter, *amFailAddr;
+
vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
mode64 = arch_host == VexArchPPC64;
+ if (mode64) vassert(max_ga <= 0xFFFFFFFFULL);
/* do some sanity checks */
mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
@@ -4643,15 +4777,20 @@
env->n_vregmap = bb->tyenv->types_used;
env->vregmapLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
env->vregmapMedLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
- if (!mode64) {
+ if (mode64) {
+ env->vregmapMedHi = NULL;
+ env->vregmapHi = NULL;
+ } else {
env->vregmapMedHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
env->vregmapHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
}
/* and finally ... */
- env->hwcaps = hwcaps_host;
- env->previous_rm = NULL;
- env->vbi = vbi;
+ env->chainingAllowed = chainingAllowed;
+ env->max_ga = max_ga;
+ env->hwcaps = hwcaps_host;
+ env->previous_rm = NULL;
+ env->vbi = vbi;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
@@ -4698,16 +4837,24 @@
}
env->vreg_ctr = j;
- /* Keep a copy of the link reg, so helper functions don't kill it. */
- env->savedLR = newVRegI(env);
- addInstr(env, PPCInstr_RdWrLR(False, env->savedLR));
+ /* The very first instruction must be an event check. */
+ amCounter = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
+ amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
+ addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, PPCInstr_ProfInc());
+ }
+
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
- if (bb->stmts[i])
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
Modified: branches/TCHAIN/priv/guest_ppc_toIR.c (+77 -68)
===================================================================
--- branches/TCHAIN/priv/guest_ppc_toIR.c 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/priv/guest_ppc_toIR.c 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -1500,23 +1500,23 @@
if (mode64) {
vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64);
stmt(
- IRStmt_Exit3(
+ IRStmt_Exit(
binop(Iop_CmpNE64,
binop(Iop_And64, mkexpr(addr), mkU64(align-1)),
mkU64(0)),
Ijk_SigBUS,
- IRConst_U64( guest_CIA_curr_instr )
+ IRConst_U64( guest_CIA_curr_instr ), OFFB_CIA
)
);
} else {
vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32);
stmt(
- IRStmt_Exit3(
+ IRStmt_Exit(
binop(Iop_CmpNE32,
binop(Iop_And32, mkexpr(addr), mkU32(align-1)),
mkU32(0)),
Ijk_SigBUS,
- IRConst_U32( guest_CIA_curr_instr )
+ IRConst_U32( guest_CIA_curr_instr ), OFFB_CIA
)
);
}
@@ -2690,10 +2690,10 @@
so that Valgrind's dispatcher sees the warning. */
putGST( PPC_GST_EMWARN, mkU32(ew) );
stmt(
- IRStmt_Exit3(
+ IRStmt_Exit(
binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)),
Ijk_EmWarn,
- mkSzConst( ty, nextInsnAddr()) ));
+ mkSzConst( ty, nextInsnAddr()), OFFB_CIA ));
}
/* Ignore all other writes */
@@ -4975,9 +4975,9 @@
for (i = 0; i < maxBytes; i++) {
/* if (nBytes < (i+1)) goto NIA; */
- stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
+ stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
Ijk_Boring,
- mkSzConst( ty, nextInsnAddr()) ));
+ mkSzConst( ty, nextInsnAddr()), OFFB_CIA ));
/* when crossing into a new dest register, set it to zero. */
if ((i % 4) == 0) {
rD++; if (rD == 32) rD = 0;
@@ -5026,9 +5026,9 @@
for (i = 0; i < maxBytes; i++) {
/* if (nBytes < (i+1)) goto NIA; */
- stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
+ stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
Ijk_Boring,
- mkSzConst( ty, nextInsnAddr() ) ));
+ mkSzConst( ty, nextInsnAddr() ), OFFB_CIA ));
/* check for crossing into a new src register. */
if ((i % 4) == 0) {
rS++; if (rS == 32) rS = 0;
@@ -5250,6 +5250,7 @@
/* The default what-next. Individual cases can override it. */
dres->whatNext = Dis_StopHere;
+ vassert(dres->jk_StopHere == Ijk_INVALID);
switch (opc1) {
case 0x12: // b (Branch, PPC32 p360)
@@ -5282,8 +5283,8 @@
dres->whatNext = Dis_ResteerU;
dres->continueAt = tgt;
} else {
- irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
- irsb->next = mkSzImm(ty, tgt);
+ dres->jk_StopHere = flag_LK ? Ijk_Call : Ijk_Boring; ;
+ putGST( PPC_GST_CIA, mkSzImm(ty, tgt) );
}
break;
@@ -5301,7 +5302,7 @@
cond_ok is either zero or nonzero, since that's the cheapest
way to compute it. Anding them together gives a value which
is either zero or non zero and so that's what we must test
- for in the IRStmt_Exit3. */
+ for in the IRStmt_Exit. */
assign( ctr_ok, branch_ctr_ok( BO ) );
assign( cond_ok, branch_cond_ok( BO, BI ) );
assign( do_branch,
@@ -5316,13 +5317,13 @@
if (flag_LK)
putGST( PPC_GST_LR, e_nia );
- stmt( IRStmt_Exit3(
+ stmt( IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)),
flag_LK ? Ijk_Call : Ijk_Boring,
- mkSzConst(ty, tgt) ) );
-
- irsb->jumpkind = Ijk_Boring;
- irsb->next = e_nia;
+ mkSzConst(ty, tgt), OFFB_CIA ) );
+
+ dres->jk_StopHere = Ijk_Boring;
+ putGST( PPC_GST_CIA, e_nia );
break;
case 0x13:
@@ -5351,18 +5352,18 @@
if (flag_LK)
putGST( PPC_GST_LR, e_nia );
- stmt( IRStmt_Exit3(
+ stmt( IRStmt_Exit(
binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)),
Ijk_Boring,
- c_nia ));
+ c_nia, OFFB_CIA ));
if (flag_LK && vbi->guest_ppc_zap_RZ_at_bl) {
make_redzone_AbiHint( vbi, lr_old,
"b-ctr-l (indirect call)" );
}
- irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
- irsb->next = mkexpr(lr_old);
+ dres->jk_StopHere = flag_LK ? Ijk_Call : Ijk_Boring;;
+ putGST( PPC_GST_CIA, mkexpr(lr_old) );
break;
case 0x010: { // bclr (Branch Cond. to Link Register, PPC32 p365)
@@ -5391,10 +5392,10 @@
if (flag_LK)
putGST( PPC_GST_LR, e_nia );
- stmt( IRStmt_Exit3(
+ stmt( IRStmt_Exit(
binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)),
Ijk_Boring,
- c_nia ));
+ c_nia, OFFB_CIA ));
if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr) {
make_redzone_AbiHint( vbi, lr_old,
@@ -5404,8 +5405,8 @@
/* blrl is pretty strange; it's like a return that sets the
return address of its caller to the insn following this
one. Mark it as a return. */
- irsb->jumpkind = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */
- irsb->next = mkexpr(lr_old);
+ dres->jk_StopHere = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */
+ putGST( PPC_GST_CIA, mkexpr(lr_old) );
break;
}
default:
@@ -5558,10 +5559,11 @@
if ((TO & b11100) == b11100 || (TO & b00111) == b00111) {
/* Unconditional trap. Just do the exit without
testing the arguments. */
- stmt( IRStmt_Exit3(
+ stmt( IRStmt_Exit(
binop(opCMPEQ, const0, const0),
Ijk_SigTRAP,
- mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
+ mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia),
+ OFFB_CIA
));
return True; /* unconditional trap */
}
@@ -5601,10 +5603,11 @@
tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const4);
cond = binop(opOR, tmp, cond);
}
- stmt( IRStmt_Exit3(
+ stmt( IRStmt_Exit(
binop(opCMPNE, cond, const0),
Ijk_SigTRAP,
- mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
+ mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia),
+ OFFB_CIA
));
return False; /* not an unconditional trap */
}
@@ -5652,9 +5655,9 @@
if (uncond) {
/* If the trap shows signs of being unconditional, don't
continue decoding past it. */
- irsb->next = mkSzImm( ty, nextInsnAddr() );
- irsb->jumpkind = Ijk_Boring;
- dres->whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm( ty, nextInsnAddr() ));
+ dres->jk_StopHere = Ijk_Boring;
+ dres->whatNext = Dis_StopHere;
}
return True;
@@ -5706,9 +5709,9 @@
if (uncond) {
/* If the trap shows signs of being unconditional, don't
continue decoding past it. */
- irsb->next = mkSzImm( ty, nextInsnAddr() );
- irsb->jumpkind = Ijk_Boring;
- dres->whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm( ty, nextInsnAddr() ));
+ dres->jk_StopHere = Ijk_Boring;
+ dres->whatNext = Dis_StopHere;
}
return True;
@@ -5739,12 +5742,12 @@
/* It's important that all ArchRegs carry their up-to-date value
at this point. So we declare an end-of-block here, which
forces any TempRegs caching ArchRegs to be flushed. */
- irsb->next = abiinfo->guest_ppc_sc_continues_at_LR
- ? getGST( PPC_GST_LR )
- : mkSzImm( ty, nextInsnAddr() );
- irsb->jumpkind = Ijk_Sys_syscall;
+ putGST( PPC_GST_CIA, abiinfo->guest_ppc_sc_continues_at_LR
+ ? getGST( PPC_GST_LR )
+ : mkSzImm( ty, nextInsnAddr() ));
- dres->whatNext = Dis_StopHere;
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_Sys_syscall;
return True;
}
@@ -6722,9 +6725,9 @@
/* be paranoid ... */
stmt( IRStmt_MBE(Imbe_Fence) );
- irsb->jumpkind = Ijk_TInval;
- irsb->next = mkSzImm(ty, nextInsnAddr());
- dres->whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr()));
+ dres->jk_StopHere = Ijk_TInval;
+ dres->whatNext = Dis_StopHere;
break;
}
@@ -13572,7 +13575,6 @@
static
DisResult disInstr_PPC_WRK (
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -13613,9 +13615,10 @@
delta = (Long)mkSzAddr(ty, (ULong)delta64);
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 0;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* At least this is simple on PPC32: insns are all 4 bytes long, and
4-aligned. So just fish the whole thing out of memory right now
@@ -13626,10 +13629,6 @@
DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr);
- /* We may be asked to update the guest CIA before going further. */
- if (put_IP)
- putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) );
-
/* Spot "Special" instructions (see comment at top of file). */
{
UChar* code = (UChar*)(guest_code + delta);
@@ -13658,9 +13657,9 @@
/* %R3 = client_request ( %R4 ) */
DIP("r3 = client_request ( %%r4 )\n");
delta += 20;
- irsb->next = mkSzImm( ty, guest_CIA_bbstart + delta );
- irsb->jumpkind = Ijk_ClientReq;
- dres.whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm( ty, guest_CIA_bbstart + delta ));
+ dres.jk_StopHere = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
@@ -13678,9 +13677,9 @@
DIP("branch-and-link-to-noredir r11\n");
delta += 20;
putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
- irsb->next = getIReg(11);
- irsb->jumpkind = Ijk_NoRedir;
- dres.whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, getIReg(11));
+ dres.jk_StopHere = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
@@ -14252,9 +14251,9 @@
Bool ok = dis_int_ldst_str( theInstr, &stopHere );
if (!ok) goto decode_failure;
if (stopHere) {
- irsb->next = mkSzImm(ty, nextInsnAddr());
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr()) );
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
}
goto decode_success;
}
@@ -14601,16 +14600,28 @@
insn, but nevertheless be paranoid and update it again right
now. */
putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) );
- irsb->next = mkSzImm(ty, guest_CIA_curr_instr);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.len = 0;
return dres;
} /* switch (opc) for the main (primary) opcode switch. */
decode_success:
/* All decode successes end up here. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr + 4));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ putGST( PPC_GST_CIA, mkSzImm(ty, dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
DIP("\n");
if (dres.len == 0) {
@@ -14633,7 +14644,6 @@
is located in host memory at &guest_code[delta]. */
DisResult disInstr_PPC ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -14677,8 +14687,7 @@
guest_CIA_curr_instr = mkSzAddr(ty, guest_IP);
guest_CIA_bbstart = mkSzAddr(ty, guest_IP - delta);
- dres = disInstr_PPC_WRK ( put_IP,
- resteerOkFn, resteerCisOk, callback_opaque,
+ dres = disInstr_PPC_WRK ( resteerOkFn, resteerCisOk, callback_opaque,
delta, archinfo, abiinfo );
return dres;
Modified: branches/TCHAIN/priv/host_ppc_defs.h (+85 -14)
===================================================================
--- branches/TCHAIN/priv/host_ppc_defs.h 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/priv/host_ppc_defs.h 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -451,7 +451,9 @@
Pin_MulL, /* widening multiply */
Pin_Div, /* div */
Pin_Call, /* call to address in register */
- Pin_Goto, /* conditional/unconditional jmp to dst */
+ Pin_XDirect, /* direct transfer to GA */
+ Pin_XIndir, /* indirect transfer to GA */
+ Pin_XAssisted, /* assisted transfer to GA */
Pin_CMov, /* conditional move */
Pin_Load, /* zero-extending load a 8|16|32|64 bit value from mem */
Pin_LoadL, /* load-linked (lwarx/ldarx) 32|64 bit value from mem */
@@ -494,7 +496,9 @@
Pin_Dfp64Unary, /* DFP64 unary op */
Pin_Dfp128nary, /* DFP128 unary op */
Pin_Dfp64Binary, /* DFP64 binary op */
- Pin_Dfp128Binary /* DFP128 binary op */
+ Pin_Dfp128Binary, /* DFP128 binary op */
+ Pin_EvCheck, /* Event check */
+ Pin_ProfInc /* 64-bit profile counter increment */
}
PPCInstrTag;
@@ -586,13 +590,30 @@
Addr64 target;
UInt argiregs;
} Call;
- /* Pseudo-insn. Goto dst, on given condition (which could be
- Pct_ALWAYS). */
+ /* Update the guest CIA value, then exit requesting to chain
+ to it. May be conditional. Use of Addr64 in order to cope
+ with 64-bit hosts. */
struct {
+ Addr64 dstGA; /* next guest address */
+ PPCAMode* amCIA; /* amode in guest state for CIA */
+ PPCCondCode cond; /* can be ALWAYS */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ PPCAMode* amCIA;
+ PPCCondCode cond; /* can be ALWAYS */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ PPCAMode* amCIA;
+ PPCCondCode cond; /* can be ALWAYS */
IRJumpKind jk;
- PPCCondCode cond;
- PPCRI* dst;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be the bogus Pct_ALWAYS. */
struct {
@@ -820,6 +841,17 @@
HReg srcR_hi;
HReg srcR_lo;
} Dfp128Binary;
+
+ struct {
+ PPCAMode* amCounter;
+ PPCAMode* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
+
} Pin;
}
PPCInstr;
@@ -834,7 +866,12 @@
extern PPCInstr* PPCInstr_MulL ( Bool syned, Bool hi32, Bool sz32, HReg, HReg, HReg );
extern PPCInstr* PPCInstr_Div ( Bool extended, Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR );
extern PPCInstr* PPCInstr_Call ( PPCCondCode, Addr64, UInt );
-extern PPCInstr* PPCInstr_Goto ( IRJumpKind, PPCCondCode cond, PPCRI* dst );
+extern PPCInstr* PPCInstr_XDirect ( Addr64 dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, Bool toFastEP );
+extern PPCInstr* PPCInstr_XIndir ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond );
+extern PPCInstr* PPCInstr_XAssisted ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, IRJumpKind jk );
extern PPCInstr* PPCInstr_CMov ( PPCCondCode, HReg dst, PPCRI* src );
extern PPCInstr* PPCInstr_Load ( UChar sz,
HReg dst, PPCAMode* src, Bool mode64 );
@@ -883,6 +920,9 @@
HReg srcR );
extern PPCInstr* PPCInstr_Dfp128Binary( PPCFpOp op, HReg dst_hi, HReg dst_lo,
HReg srcR_hi, HReg srcR_lo );
+extern PPCInstr* PPCInstr_EvCheck ( PPCAMode* amCounter,
+ PPCAMode* amFailAddr );
+extern PPCInstr* PPCInstr_ProfInc ( void );
extern void ppPPCInstr(PPCInstr*, Bool mode64);
@@ -892,10 +932,13 @@
extern void getRegUsage_PPCInstr ( HRegUsage*, PPCInstr*, Bool mode64 );
extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64);
extern Bool isMove_PPCInstr ( PPCInstr*, HReg*, HReg* );
-extern Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr*,
+extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, PPCInstr* i,
Bool mode64,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offsetB, Bool mode64 );
@@ -903,10 +946,38 @@
HReg rreg, Int offsetB, Bool mode64 );
extern void getAllocableRegs_PPC ( Int*, HReg**, Bool mode64 );
-extern HInstrArray* iselSB_PPC ( IRSB*, VexArch,
- VexArchInfo*,
- VexAbiInfo* );
+extern HInstrArray* iselSB_PPC ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and
+ host_EvC_COUNTER. */
+extern Int evCheckSzB_PPC ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_PPC ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to,
+ Bool mode64 );
+
+extern VexInvalRange unchainXDirect_PPC ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me,
+ Bool mode64 );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_PPC ( void* place_to_patch,
+ ULong* location_of_counter,
+ Bool mode64 );
+
+
#endif /* ndef __VEX_HOST_PPC_DEFS_H */
/*---------------------------------------------------------------*/
Modified: branches/TCHAIN/priv/host_ppc_defs.c (+761 -104)
===================================================================
--- branches/TCHAIN/priv/host_ppc_defs.c 2012-04-19 15:23:48 +01:00 (rev 2288)
+++ branches/TCHAIN/priv/host_ppc_defs.c 2012-04-20 01:13:28 +01:00 (rev 2289)
@@ -838,15 +838,35 @@
vassert(0 == (argiregs & ~mask));
return i;
}
-PPCInstr* PPCInstr_Goto ( IRJumpKind jk,
- PPCCondCode cond, PPCRI* dst ) {
- PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
- i->tag = Pin_Goto;
- i->Pin.Goto.cond = cond;
- i->Pin.Goto.dst = dst;
- i->Pin.Goto.jk = jk;
+PPCInstr* PPCInstr_XDirect ( Addr64 dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, Bool toFastEP ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_XDirect;
+ i->Pin.XDirect.dstGA = dstGA;
+ i->Pin.XDirect.amCIA = amCIA;
+ i->Pin.XDirect.cond = cond;
+ i->Pin.XDirect.toFastEP = toFastEP;
return i;
}
+PPCInstr* PPCInstr_XIndir ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_XIndir;
+ i->Pin.XIndir.dstGA = dstGA;
+ i->Pin.XIndir.amCIA = amCIA;
+ i->Pin.XIndir.cond = cond;
+ return i;
+}
+PPCInstr* PPCInstr_XAssisted ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, IRJumpKind jk ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_XAssisted;
+ i->Pin.XAssisted.dstGA = dstGA;
+ i->Pin.XAssisted.amCIA = amCIA;
+ i->Pin.XAssisted.cond = cond;
+ i->Pin.XAssisted.jk = jk;
+ return i;
+}
PPCInstr* PPCInstr_CMov ( PPCCondCode cond,
HReg dst, PPCRI* src ) {
PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
@@ -1008,6 +1028,21 @@
return i;
}
+PPCInstr* PPCInstr_EvCheck ( PPCAMode* amCounter,
+ PPCAMode* amFailAddr ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_EvCheck;
+ i->Pin.EvCheck.amCounter = amCounter;
+ i->Pin.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+PPCInstr* PPCInstr_ProfInc ( void ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_ProfInc;
+ return i;
+}
+
+
/*
Valid combo | fromI | int32 | syned | flt64 |
--------------------------------------------
@@ -1371,27 +1406,54 @@
vex_printf("] }");
break;
}
- case Pin_Goto:
- vex_printf("goto: ");
- if (i->Pin.Goto.cond.test != Pct_ALWAYS) {
- vex_printf("if (%s) ", showPPCCondCode(i->Pin.Goto.cond));
+ case Pin_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%s) { ",
+ showPPCCondCode(i->Pin.XDirect.cond));
+ if (mode64) {
+ vex_printf("imm64 r30,0x%llx; ", i->Pin.XDirect.dstGA);
+ vex_printf("std r30,");
+ } else {
+ vex_printf("imm32 r30,0x%llx; ", i->Pin.XDirect.dstGA);
+ vex_printf("stw r30,");
}
- vex_printf("{ ");
- if (i->Pin.Goto.jk != Ijk_Boring
- && i->Pin.Goto.jk != Ijk_Call
- && i->Pin.Goto.jk != Ijk_Ret) {
- vex_printf("li %%r31,$");
- ppIRJumpKind(i->Pin.Goto.jk);
- vex_printf(" ; ");
- }
- if (i->Pin.Goto.dst->tag == Pri_Imm) {
- ppLoadImm(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Imm,
- mode64);
+ ppPPCAMode(i->Pin.XDirect.amCIA);
+ vex_printf("; ");
+ if (mode64) {
+ vex_printf("imm64-fixed5 r30,$disp_cp_chain_me_to_%sEP; ",
+ i->Pin.XDirect.toFastEP ? "fast" : "slow");
} else {
- ppMovReg(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Reg);
+ vex_printf("imm32-fixed2 r30,$disp_cp_chain_me_to_%sEP; ",
+ i->Pin.XDirect.toFastEP ? "fast" : "slow");
}
- vex_printf(" ; blr }");
+ vex_printf("mtctr r30; bctrl }");
return;
+ case Pin_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%s) { ",
+ showPPCCondCode(i->Pin.XIndir.cond));
+ vex_printf("%s ", mode64 ? "std" : "stw");
+ ppHRegPPC(i->Pin.XIndir.dstGA);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.XIndir.amCIA);
+ vex_printf("; ");
+ vex_printf("imm%s r30,$disp_cp_xindir; ", mode64 ? "64" : "32");
+ vex_printf("mtctr r30; bctr }");
+ return;
+ case Pin_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%s) { ",
+ showPPCCondCode(i->Pin.XAssisted.cond));
+ vex_printf("%s ", mode64 ? "std" : "stw");
+ ppHRegPPC(i->Pin.XAssisted.dstGA);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.XAssisted.amCIA);
+ vex_printf("; ");
+ vex_printf("li r31,$IRJumpKind_to_TRCVAL(%d); ",
+ (Int)i->Pin.XAssisted.jk);
+ vex_printf("imm%s r30,$disp_cp_xindir; ", mode64 ? "64" : "32");
+ vex_printf("mtctr r30; bctr }");
+ return;
case Pin_CMov:
vex_printf("cmov (%s) ", showPPCCondCode(i->Pin.CMov.cond));
ppHRegPPC(i->Pin.CMov.dst);
@@ -1773,6 +1835,30 @@
ppHRegPPC(i->Pin.Dfp128Binary.srcR_hi);
return;
+ case Pin_EvCheck:
+ /* Note that the counter dec is 32 bit even in 64-bit mode. */
+ vex_printf("(evCheck) ");
+ vex_printf("lwz r30,");
+ ppPPCAMode(i->Pin.EvCheck.amCounter);
+ vex_printf("; addic. r30,r30,-1; ");
+ vex_printf("stw r30,");
+ ppPPCAMode(i->Pin.EvCheck.amCounter);
+ vex_printf("; bge nofail; lwz r30,");
+ ppPPCAMode(i->Pin.EvCheck.amFailAddr);
+ vex_printf("; mtctr r30; bctr; nofail:");
+ return;
+
+ case Pin_ProfInc:
+ if (mode64) {
+ vex_printf("(profInc) imm64 r30,$NotKnownYet;");
+ vex_printf("ld r29,(r30); addi r29,r29,1; std r29,(r30)");
+ } else {
+ vex_printf("(profInc) imm32 r30,$NotKnownYet;");
+ vex_printf("lwz r29,4(r30); addic. r29,r29,1; stw r29,4(r30)");
+ vex_printf("lwz r29,0(r30); addze r29,r29; stw r29,0(r30)");
+ }
+ break;
+
default:
vex_printf("\nppPPCInstr: No such tag(%d)\n", (Int)i->tag);
vpanic("ppPPCInstr");
@@ -1871,18 +1957,22 @@
and no other, as a destination temporary. */
return;
}
- case Pin_Goto:
- addRegUsage_PPCRI(u, i->Pin.Goto.dst);
- /* GPR3 holds destination address from Pin_Goto */
- addHRegUse(u, HRmWrite, hregPPC_GPR3(mode64));
- if (i->Pin.Goto.jk != Ijk_Boring
- && i->Pin.Goto.jk != Ijk_Call
- && i->Pin.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since the guest state pointer
- register is not actually available to the allocator.
- But still .. */
- addHRegUse(u, HRmWrite, GuestStatePtr(mode64));
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is empty,
+ hence only (1) is relevant here. */
+ case Pin_XDirect:
+ addRegUsage_PPCAMode(u, i->Pin.XDirect.amCIA);
return;
+ case Pin_XIndir:
+ addHRegUse(u, HRmRead, i->Pin.XIndir.dstGA);
+ addRegUsage_PPCAMode(u, i->Pin.XIndir.amCIA);
+ return;
+ case Pin_XAssisted:
+ addHRegUse(u, HRmRead, i->Pin.XAssisted.dstGA);
+ addRegUsage_PPCAMode(u, i->Pin.XAssisted.amCIA);
+ return;
case Pin_CMov:
addRegUsage_PPCRI(u, i->Pin.CMov.src);
addHRegUse(u, HRmWrite, i->Pin.CMov.dst);
@@ -2055,7 +2145,18 @@
addHRegUse(u, HRmRead, i->Pin.Dfp128Binary.srcR_hi);
addHRegUse(u, HRmRead, i->Pin.Dfp128Binary.srcR_lo);
return;
-
+ case Pin_EvCheck:
+ /* We expect both amodes only to mention the GSP (r31), so this
+ is in fact pointless, since GSP isn't allocatable, but
+ anyway.. */
+ addRegUsage_PPCAMode(u, i->Pin.EvCheck.amCounter);
+ addRegUsage_PPCAMode(u, i->Pin.EvCheck.amFailAddr);
+ addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64)); /* also unavail to RA */
+ return;
+ case Pin_ProfInc:
+ addHRegUse(u, HRmWrite, hregPPC_GPR29(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64));
+ return;
default:
ppPPCInstr(i, mode64);
vpanic("getRegUsage_PPCInstr");
@@ -2109,9 +2210,17 @@
return;
case Pin_Call:
return;
- case Pin_Goto:
- mapRegs_PPCRI(m, i->Pin.Goto.dst);
+ case Pin_XDirect:
+ mapRegs_PPCAMode(m, i->Pin.XDirect.amCIA);
return;
+ case Pin_XIndir:
+ mapReg(m, &i->Pin.XIndir.dstGA);
+ mapRegs_PPCAMode(m, i->Pin.XIndir.amCIA);
+ return;
+ case Pin_XAssisted:
+ mapReg(m, &i->Pin.XAssisted.dstGA);
+ mapRegs_PPCAMode(m, i->Pin.XAssisted.amCIA);
+ return;
case Pin_CMov:
mapRegs_PPCRI(m, i->Pin.CMov.src);
mapReg(m, &i->Pin.CMov.dst);
@@ -2266,7 +2375,16 @@
mapReg(m, &i->Pin.Dfp128Binary.srcR_hi);
mapReg(m, &i->Pin.Dfp128Binary.srcR_lo);
return;
-
+ case Pin_EvCheck:
+ /* We expect both amodes only to mention the GSP (r31), so this
+ is in fact pointless, since GSP isn't allocatable, but
+ anyway.. */
+ mapRegs_PPCAMode(m, i->Pin.EvCheck.amCounter);
+ mapRegs_PPCAMode(m, i->Pin.EvCheck.amFailAddr);
+ return;
+ case Pin_ProfInc:
+ /* hardwires r29 and r30 -- nothing to modify. */
+ return;
default:
ppPPCInstr(i, mode64);
vpanic("mapRegs_PPCInstr");
@@ -2400,7 +2518,7 @@
return n;
}
-/* Emit 32bit instruction big-endianly */
+/* Emit an instruction big-endianly */
static UChar* emit32 ( UChar* p, UInt w32 )
{
*p++ = toUChar((w32 >> 24) & 0x000000FF);
@@ -2410,6 +2528,17 @@
return p;
}
+/* Fetch an instruction big-endianly */
+static UInt fetch32 ( UChar* p )
+{
+ UInt w32 = 0;
+ w32 |= ((0xFF & (UInt)p[0]) << 24);
+ w32 |= ((0xFF & (UInt)p[1]) << 16);
+ w32 |= ((0xFF & (UInt)p[2]) << 8);
+ w32 |= ((0xFF & (UInt)p[3]) << 0);
+ return w32;
+}
+
/* The following mkForm[...] functions refer to ppc instruction forms
as per PPC32 p576
*/
@@ -2693,6 +2822,210 @@
return p;
}
+/* A simplified version of mkLoadImm that always generates 2 or 5
+ instructions (32 or 64 bits respectively) even if it could generate
+ fewer. This is needed for generating fixed sized patchable
+ sequences. */
+static UChar* mkLoadImm_EXACTLY2or5 ( UChar* p,
+ UInt r_dst, ULong imm, Bool mode64 )
+{
+ vassert(r_dst < 0x20);
+
+ if (!mode64) {
+ /* In 32-bit mode, make sure the top 32 bits of imm are a sign
+ extension of the bottom 32 bits. (Probably unnecessary.) */
+ UInt u32 = (UInt)imm;
+ Int s32 = (Int)u32;
+ Long s64 = (Long)s32;
+ imm = (ULong)s64;
+ }
+
+ if (!mode64) {
+ // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
+ p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+ // ori r_dst, r_dst, (imm & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+
+ } else {
+ // full 64bit immediate load: 5 (five!) insns.
+
+ // load high word
+ // lis r_dst, (imm>>48) & 0xFFFF
+ p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm>>32) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+
+ // shift r_dst low word to high word => rldicr
+ p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+
+ // load low word
+ // oris r_dst, r_dst, (imm>>16) & 0xFFFF
+ p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+ }
+ return p;
+}
+
+/* Checks whether the sequence of bytes at p was indeed created
+ by mkLoadImm_EXACTLY2or5 with the given parameters. */
+static Bool isLoadImm_EXACTLY2or5 ( UChar* p_to_check,
+ UInt r_dst, ULong imm, Bool mode64 )
+{
+ vassert(r_dst < 0x20);
+
+ if (!mode64) {
+ /* In 32-bit mode, make sure the top 32 bits of imm are a sign
+ extension of the bottom 32 bits. (Probably unnecessary.) */
+ UInt u32 = (UInt)imm;
+ Int s32 = (Int)u32;
+ Long s64 = (Long)s32;
+ imm = (ULong)s64;
+ }
+
+ if (!mode64) {
+ UInt expect[2] = { 0, 0 };
+ UChar* p = (UChar*)&expect[0];
+ // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
+ p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+ // ori r_dst, r_dst, (imm & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+ vassert(p == (UChar*)&expect[2]);
+
+ return fetch32(p_to_check + 0) == expect[0]
+ && fetch32(p_to_check + 4) == expect[1];
+
+ } else {
+ UInt expect[5] = { 0, 0, 0, 0, 0 };
+ UChar* p = (UChar*)&expect[0];
+ // full 64bit immediate load: 5 (five!) insns.
+
+ // load high word
+ // lis r_dst, (imm>>48) & 0xFFFF
+ p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm>>32) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+
+ // shift r_dst low word to high word => rldicr
+ p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+
+ // load low word
+ // oris r_dst, r_dst, (imm>>16) & 0xFFFF
+ p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+
+ vassert(p == (UChar*)&expect[5]);
+
+ return fetch32(p_to_check + 0) == expect[0]
+ && fetch32(p_to_check + 4) == expect[1]
+ && fetch32(p_to_check + 8) == expect[2]
+ && fetch32(p_to_check + 12) == expect[3]
+ && fetch32(p_to_check + 16) == expect[4];
+ }
+}
+
+
+/* Generate a machine-word sized load or store. Simplified version of
+ the Pin_Load and Pin_Store cases below. */
+static UChar* do_load_or_store_machine_word (
+ UChar* p, Bool isLoad,
+ UInt reg, PPCAMode* am, Bool mode64 )
+{
+ if (isLoad) {
+ UInt opc1, sz = mode64 ? 8 : 4;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ switch (sz) {
+ case 4: opc1 = 32; vassert(!mode64); break;
+ case 8: opc1 = 58; vassert(mode64); break;
+ default: vassert(0);
+ }
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ } else /*store*/ {
+ UInt opc1, sz = mode64 ? 8 : 4;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ switch (sz) {
+ case 4: opc1 = 36; vassert(!mode64); break;
+ case 8: opc1 = 62; vassert(mode64); break;
+ default: vassert(0);
+ }
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ }
+ return p;
+}
+
+/* Generate a 32-bit sized load or store. Simplified version of
+ do_load_or_store_machine_word above. */
+static UChar* do_load_or_store_word32 (
+ UChar* p, Bool isLoad,
+ UInt reg, PPCAMode* am, Bool mode64 )
+{
+ if (isLoad) {
+ UInt opc1;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ opc1 = 32;
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ } else /*store*/ {
+ UInt opc1;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ opc1 = 36;
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ }
+ return p;
+}
+
/* Move r_dst to r_src */
static UChar* mkMoveReg ( UChar* p, UInt r_dst, UInt r_src )
{
@@ -2753,18 +3086,19 @@
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
- imperative to emit position-independent code.
-
- Note, dispatch should always be NULL since ppc32/64 backends
- use a call-return scheme to get from the dispatcher to generated
- code and back.
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else leave
+ it unchanged.
*/
-Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i,
+Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, PPCInstr* i,
Bool mode64,
- void* dispatch_unassisted, void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UChar* p = &buf[0];
- UChar* ptmp = p;
vassert(nbuf >= 32);
if (0) {
@@ -3131,6 +3465,7 @@
getRegUsage_PPCInstr above, %r10 is used as an address temp */
/* jump over the following insns if condition does not hold */
+ UChar* ptmp = NULL;
if (cond.test != Pct_ALWAYS) {
/* jmp fwds if !condition */
/* don't know how many bytes to jump over yet...
@@ -3159,75 +3494,175 @@
goto done;
}
- case Pin_Goto: {
- UInt trc = 0;
- UChar r_ret = 3; /* Put target addr into %r3 */
- PPCCondCode cond = i->Pin.Goto.cond;
- UInt r_dst;
- ULong imm_dst;
+ case Pin_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated
+ with the chainXDirect_PPC and unchainXDirect_PPC below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations
+ can't use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
- vassert(dispatch_unassisted == NULL);
- vassert(dispatch_assisted == NULL);
-
- /* First off, if this is conditional, create a conditional
- jump over the rest of it. */
- if (cond.test != Pct_ALWAYS) {
- /* jmp fwds if !condition */
- /* don't know how many bytes to jump over yet...
- make space for a jump instruction and fill in later. */
- ptmp = p; /* fill in this bit later */
+ /* First off, if this is conditional, create a conditional jump
+ over the rest of it. Or at least, leave a space for it that
+ we will shortly fill in. */
+ UChar* ptmp = NULL;
+ if (i->Pin.XDirect.cond.test != Pct_ALWAYS) {
+ vassert(i->Pin.XDirect.cond.flag != Pcf_NONE);
+ ptmp = p;
p += 4;
+ } else {
+ vassert(i->Pin.XDirect.cond.flag == Pcf_NONE);
}
- // cond succeeds...
-
- /* If a non-boring, set GuestStatePtr appropriately. */
- switch (i->Pin.Goto.jk) {
- case Ijk_ClientReq: trc = VEX_TRC_JMP_CLIENTREQ; break;
- case Ijk_Sys_syscall: trc = VEX_TRC_JMP_SYS_SYSCALL; break;
- case Ijk_Yield: trc = VEX_TRC_JMP_YIELD; break;
- case Ijk_EmWarn: trc = VEX_TRC_JMP_EMWARN; break;
- case Ijk_EmFail: trc = VEX_TRC_JMP_EMFAIL; break;
- case Ijk_MapFail: trc = VEX_TRC_JMP_MAPFAIL; break;
- case Ijk_NoDecode: trc = VEX_TRC_JMP_NODECODE; break;
- case Ijk_TInval: trc = VEX_TRC_JMP_TINVAL; break;
- case Ijk_NoRedir: trc = VEX_TRC_JMP_NOREDIR; break;
- case Ijk_SigTRAP: trc = VEX_TRC_JMP_SIGTRAP; break;
- case Ijk_SigBUS: trc = VEX_TRC_JMP_SIGBUS; break;
- case Ijk_Ret:
- case Ijk_Call:
- case Ijk_Boring:
- break;
- default:
- ppIRJumpKind(i->Pin.Goto.jk);
- vpanic("emit_PPCInstr.Pin_Goto: unknown jump kind");
+ /* Update the guest CIA. */
+ /* imm32/64 r30, dstGA */
+ if (!mode64) vassert(0 == (((ULong)i->Pin.XDirect.dstGA) >> 32));
+ p = mkLoadImm(p, /*r*/30, (ULong)i->Pin.XDirect.dstGA, mode64);
+ /* stw/std r30, amCIA */
+ p = do_load_or_store_machine_word(
+ p, False/*!isLoad*/,
+ /*r*/30, i->Pin.XDirect.amCIA, mode64
+ );
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the number of
+ instructions (32-bit: 4, 64-bit: 7) below. */
+ /* imm32/64-fixed r30, VG_(disp_cp_chain_me_to_{slowEP,fastEP} */
+ void* disp_cp_chain_me
+ = i->Pin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = mkLoadImm_EXACTLY2or5(
+ p, /*r*/30, Ptr_to_ULong(disp_cp_chain_me), mode64);
+ /* mtctr r30 */
+ p = mkFormXFX(p, /*r*/30, 9, 467);
+ /* bctrl */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1);
+ /* --- END of PATCHABLE BYTES --- */
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Pin.XDirect.cond.test != Pct_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3));
+ /* bc !ct,cf,delta */
+ mkFormB(ptmp, invertCondTest(i->Pin.XDirect.cond.test),
+ i->Pin.XDirect.cond.flag, (delta>>2), 0, 0);
}
- if (trc !=0) {
- vassert(trc < 0x10000);
- /* addi r31,0,trc...
[truncated message content] |