You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
1
(1) |
2
(2) |
3
(2) |
4
(3) |
5
(1) |
|
6
(2) |
7
|
8
|
9
|
10
(1) |
11
|
12
|
|
13
|
14
(2) |
15
(27) |
16
(1) |
17
(4) |
18
(4) |
19
|
|
20
|
21
(1) |
22
(2) |
23
|
24
(2) |
25
|
26
(2) |
|
27
|
28
(22) |
29
(5) |
30
(3) |
31
(6) |
|
|
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:28:01
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=613905cabed0e418fe1aa7c69962af4acd897e1b commit 613905cabed0e418fe1aa7c69962af4acd897e1b Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 29 00:07:32 2017 +0200 Add some support for If-Then-Else into VEX register allocator v3. Stages 1-3 work correctly on and HInstrVec hierarchy. Stage 4 is missing merges after HInstrIfThenElse legs join. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 930 +++++++++++++++++++++++++------------ 1 file changed, 638 insertions(+), 292 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 81836b6..5b60a35 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -41,21 +41,52 @@ #define INVALID_INSTRNO (-2) +#define INSTRNO_TOTAL toShort(ii_chunk + chunk->ii_total_start) + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + +/* Instruction numbering. + One instruction has three numbers, depending on the context. + Chunk = local within the RegAllocChunk. + Vec = index in the HInstrVec. + Total = total ordering. + + Consider the following HInstrSB structure: + insn1 + if (!cond) then fall-through { + insn2 + insn3 + } else out-of-line { + insn4 + } + insn5 + insn6 + + The resulting numbering is as follows: + chunk vec total + insn1 0 0 0 + insn2 0 0 1 + insn3 1 1 2 + insn4 0 0 1 + insn5 0 1 3 + insn6 1 2 4 +*/ /* Register allocator state is kept in an array of VRegState's. There is an element for every virtual register (vreg). Elements are indexed [0 .. n_vregs-1]. - Records information about vreg live range and its state. */ + Records information about vreg live range (in total ordering) and its state. + */ typedef struct { /* Live range, register class and spill offset are computed during the first register allocator pass and remain unchanged after that. */ /* This vreg becomes live with this instruction (inclusive). Contains - either an instruction number or INVALID_INSTRNO. */ + either an instruction number in total ordering or INVALID_INSTRNO. */ Short live_after; /* This vreg becomes dead before this instruction (exclusive). Contains - either an instruction number or INVALID_INSTRNO. */ + either an instruction number in total ordering or INVALID_INSTRNO. */ Short dead_before; /* What kind of register this is. */ HRegClass reg_class; @@ -75,6 +106,20 @@ typedef } VRegState; +/* Records information on a real-register live range, associated with + a particular real register. Instruction numbers use chunk (local) numbering. + Computed once; does not change. */ +typedef + struct { + /* This rreg becomes live with this instruction (inclusive). Contains + either an instruction number in chunk numbering or INVALID_INSTRNO. */ + Short live_after; + /* This rreg becomes dead before this instruction (exclusive). Contains + either an instruction number in chunk numbering or INVALID_INSTRNO. */ + Short dead_before; + } + RRegLR; + /* The allocator also maintains a redundant array of indexes (rreg_state) from rreg numbers back to entries in vreg_state. It is redundant because iff rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the @@ -99,25 +144,13 @@ typedef } RRegState; -/* Records information on a real-register live range, associated with - a particular real register. Computed once; does not change. */ -typedef - struct { - /* This rreg becomes live with this instruction (inclusive). Contains - either an instruction number or INVALID_INSTRNO. */ - Short live_after; - /* This rreg becomes dead before this instruction (exclusive). Contains - either an instruction number or INVALID_INSTRNO. */ - Short dead_before; - } - RRegLR; - /* Live ranges for a single rreg and the current one. Live ranges are computed during the first register allocator pass and remain unchanged after that. The identity of the real register is not recorded here, because the index - of this structure in |rreg_lr_state| is the index number of the register, and - the register itself can be extracted from the RRegUniverse (univ). */ + of this structure in RegAllocChunk->rreg_lr_state is the index number of the + register, and the register itself can be extracted from the + RRegUniverse (univ). */ typedef struct { RRegLR* lrs; @@ -134,6 +167,123 @@ typedef #define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs) #define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs) +/* Represents register allocator state corresponding to one contiguous chunk + of instructions. The chunk either continues with If-Then-Else legs or + simply ends. */ +typedef + struct RegAllocChunk_ { + /* Live ranges of real registers. Computed during the first register + allocator pass and remain unchanged after that. Inherently local + to every chunk. */ + RRegLRState* rreg_lr_state; + UInt n_rregs; + + /* Incoming contiguous chunk of instructions starting at |ii_vec_start| + of size |ii_vec_len|. No HInstrIfThenElse is present here. */ + HInstrVec* instrs_in; + Short ii_vec_start; + UShort ii_vec_len; /* This is also ii_chunk_len. */ + Short ii_total_start; /* Start index for insns in total ordering. */ + /* Register usage for the current instr chunk of size |ii_vec_len|. */ + HRegUsage* reg_usage; + HInstrVec* instrs_out; + + /* Are If-Then-Legs present? */ + Bool isIfThenElse; + struct { + HCondCode ccOOL; /* Condition code for the OOL branch. */ + struct RegAllocChunk_* fallThrough; + struct RegAllocChunk_* outOfLine; + HPhiNode* phi_nodes; + UInt n_phis; + } IfThenElse; + struct RegAllocChunk_* next; /* Next chunk, if any. */ + + /* Possible combinations (x = allowed, - = not allowed): + If-Then-Else legs: | present | not present + ---------------------------------------------------------------- + next chunk: not NULL | x | - + NULL | x | x + */ + } + RegAllocChunk; + +static void init_rreg_lr_state(RRegLRState* rreg_lrs) +{ + rreg_lrs->lrs_size = 4; + rreg_lrs->lrs = LibVEX_Alloc_inline(rreg_lrs->lrs_size + * sizeof(RRegLR)); + rreg_lrs->lrs_used = 0; + rreg_lrs->lr_current = &rreg_lrs->lrs[0]; + rreg_lrs->lr_current_idx = 0; +} + +static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_rregs) +{ + RegAllocChunk* chunk = LibVEX_Alloc_inline(sizeof(RegAllocChunk)); + chunk->n_rregs = n_rregs; + chunk->rreg_lr_state = LibVEX_Alloc_inline(chunk->n_rregs + * sizeof(RRegLRState)); + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + init_rreg_lr_state(&chunk->rreg_lr_state[r_idx]); + } + chunk->instrs_in = instrs_in; + chunk->ii_vec_start = INVALID_INSTRNO; + chunk->ii_vec_len = 0; + chunk->ii_total_start = INVALID_INSTRNO; + chunk->reg_usage = NULL; + chunk->instrs_out = NULL; + chunk->isIfThenElse = False; + chunk->next = NULL; + + return chunk; +} + +static void print_depth(UInt depth) +{ + for (UInt i = 0; i < depth; i++) { + vex_printf(" "); + } +} + +#define WALK_CHUNKS(process_one_chunk, process_fall_through_leg, \ + process_out_of_line_leg, process_phi_nodes) \ + do { \ + while (chunk != NULL) { \ + process_one_chunk; \ + if (chunk->isIfThenElse) { \ + if (DEBUG_REGALLOC) { \ + print_depth(depth); \ + vex_printf("if (!"); \ + con->ppCondCode(chunk->IfThenElse.ccOOL); \ + vex_printf(") then fall-through {\n"); \ + } \ + process_fall_through_leg; \ + if (DEBUG_REGALLOC) { \ + print_depth(depth); \ + vex_printf("} else out-of-line {\n"); \ + } \ + process_out_of_line_leg; \ + if (DEBUG_REGALLOC) { \ + print_depth(depth); \ + vex_printf("}\n"); \ + } \ + if (chunk->IfThenElse.n_phis > 0) { \ + process_phi_nodes; \ + if (DEBUG_REGALLOC) { \ + for (UInt p = 0; p < chunk->IfThenElse.n_phis; p++) { \ + print_depth(depth); \ + ppHPhiNode(&chunk->IfThenElse.phi_nodes[p]); \ + vex_printf("\n"); \ + } \ + } \ + } \ + } \ + chunk = chunk->next; \ + } \ + } while (0) + + /* Compute the index of the highest and lowest 1 in a ULong, respectively. Results are undefined if the argument is zero. Don't pass it zero :) */ static inline UInt ULong__maxIndex ( ULong w64 ) { @@ -157,13 +307,13 @@ static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs) rreg_lrs->lrs_size = 2 * rreg_lrs->lrs_used; } -static inline void print_state( - const RegAllocControl* con, - const VRegState* vreg_state, UInt n_vregs, - const RRegState* rreg_state, UInt n_rregs, - const RRegLRState* rreg_lr_state, - UShort current_ii) +static inline void print_state(const RegAllocChunk* chunk, + const VRegState* vreg_state, UInt n_vregs, const RRegState* rreg_state, + Short ii_total_current, const RegAllocControl* con) { + vex_printf("Register Allocator state (current instruction total #%d)\n", + ii_total_current); + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { const VRegState* vreg = &vreg_state[v_idx]; @@ -192,18 +342,19 @@ static inline void print_state( vex_printf(" "); } - if (vreg->live_after > (Short) current_ii) { - vex_printf("[not live yet]\n"); - } else if ((Short) current_ii >= vreg->dead_before) { - vex_printf("[now dead]\n"); + if (vreg->live_after > ii_total_current) { + vex_printf("[not live yet]"); + } else if (ii_total_current >= vreg->dead_before) { + vex_printf("[now dead]"); } else { - vex_printf("[live]\n"); + vex_printf("[live]"); } + vex_printf(" [%d - %d)\n", vreg->live_after, vreg->dead_before); } - for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { - const RRegState* rreg = &rreg_state[r_idx]; - const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + for (UInt r_idx = 0; r_idx < chunk->n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLR* lr = chunk->rreg_lr_state[r_idx].lr_current; vex_printf("rreg_state[%2u] = ", r_idx); UInt written = con->ppReg(con->univ->regs[r_idx]); for (Int w = 10 - written; w > 0; w--) { @@ -221,14 +372,13 @@ static inline void print_state( break; case Reserved: vex_printf("reserved - live range [%d, %d)\n", - rreg_lrs->lr_current->live_after, - rreg_lrs->lr_current->dead_before); + lr->live_after, lr->dead_before); break; } } } -static inline void emit_instr(HInstr* instr, HInstrVec* instrs_out, +static inline void emit_instr(RegAllocChunk* chunk, HInstr* instr, const RegAllocControl* con, const HChar* why) { if (DEBUG_REGALLOC) { @@ -240,17 +390,20 @@ static inline void emit_instr(HInstr* instr, HInstrVec* instrs_out, vex_printf("\n\n"); } - addHInstr(instrs_out, instr); + addHInstr(chunk->instrs_out, instr); } /* Spills a vreg assigned to some rreg. The vreg is spilled and the rreg is freed. Returns rreg's index. */ static inline UInt spill_vreg( - HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs, - RRegState* rreg_state, UInt n_rregs, HInstrVec* instrs_out, + RegAllocChunk* chunk, + VRegState* vreg_state, UInt n_vregs, RRegState* rreg_state, + HReg vreg, UInt v_idx, Short ii_total_current, const RegAllocControl* con) { + UInt n_rregs = chunk->n_rregs; + /* Check some invariants first. */ vassert(IS_VALID_VREGNO((v_idx))); vassert(vreg_state[v_idx].disp == Assigned); @@ -258,7 +411,7 @@ static inline UInt spill_vreg( UInt r_idx = hregIndex(rreg); vassert(IS_VALID_RREGNO(r_idx)); vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg)); - vassert(vreg_state[v_idx].dead_before > (Short) current_ii); + vassert(vreg_state[v_idx].dead_before > ii_total_current); vassert(vreg_state[v_idx].reg_class != HRcINVALID); /* Generate spill. */ @@ -268,10 +421,10 @@ static inline UInt spill_vreg( con->mode64); vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */ if (spill1 != NULL) { - emit_instr(spill1, instrs_out, con, "spill1"); + emit_instr(chunk, spill1, con, "spill1"); } if (spill2 != NULL) { - emit_instr(spill2, instrs_out, con, "spill2"); + emit_instr(chunk, spill2, con, "spill2"); } /* Update register allocator state. */ @@ -287,42 +440,47 @@ static inline UInt spill_vreg( The vreg must not be from the instruction being processed, that is, it must not be listed in reg_usage->vRegs. */ static inline HReg find_vreg_to_spill( - VRegState* vreg_state, UInt n_vregs, - RRegState* rreg_state, UInt n_rregs, + const RegAllocChunk* chunk, + const VRegState* vreg_state, UInt n_vregs, const RRegState* rreg_state, const HRegUsage* instr_regusage, HRegClass target_hregclass, - const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max, - const RegAllocControl* con) + Short ii_chunk_current, const RegAllocControl* con) { + Short scan_forward_start = ii_chunk_current + 1; + Short scan_forward_max = chunk->ii_vec_len - 1; + /* Scan forwards a few instructions to find the most distant mentioned use of a vreg. We can scan in the range of (inclusive): - - reg_usage[scan_forward_from] + - reg_usage[scan_forward_start] - reg_usage[scan_forward_end], where scan_forward_end - = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */ + = MIN(scan_forward_max, scan_forward_start + FEW_INSTRUCTIONS). + reg_usage uses chunk instruction numbering. */ # define FEW_INSTRUCTIONS 5 - UInt scan_forward_end - = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ? - scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS; + Short scan_forward_end + = (scan_forward_max <= scan_forward_start + FEW_INSTRUCTIONS) ? + scan_forward_max : scan_forward_start + FEW_INSTRUCTIONS; # undef FEW_INSTRUCTIONS HReg vreg_found = INVALID_HREG; - UInt distance_so_far = 0; + Short distance_so_far = 0; for (UInt r_idx = con->univ->allocable_start[target_hregclass]; r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { - if (rreg_state[r_idx].disp == Bound) { - HReg vreg = rreg_state[r_idx].vreg; + + const RRegState* rreg = &rreg_state[r_idx]; + if (rreg->disp == Bound) { + HReg vreg = rreg->vreg; if (! HRegUsage__contains(instr_regusage, vreg)) { - UInt ii = scan_forward_from; - for ( ; ii <= scan_forward_end; ii++) { - if (HRegUsage__contains(®_usage[ii], vreg)) { + Short ii_chunk = scan_forward_start; + for ( ; ii_chunk <= scan_forward_end; ii_chunk++) { + if (HRegUsage__contains(&chunk->reg_usage[ii_chunk], vreg)) { break; } } - if (ii - scan_forward_from > distance_so_far) { - distance_so_far = ii = scan_forward_from; + if (ii_chunk - scan_forward_start > distance_so_far) { + distance_so_far = ii_chunk - scan_forward_start; vreg_found = vreg; - if (ii + distance_so_far == scan_forward_end) { + if (ii_chunk + distance_so_far == scan_forward_end) { break; /* We are at the end. Nothing could be better. */ } } @@ -331,10 +489,10 @@ static inline HReg find_vreg_to_spill( } if (hregIsInvalid(vreg_found)) { - vex_printf("doRegisterAllocation: cannot find a register in class: "); + vex_printf("registerAllocation: cannot find a register in class: "); ppHRegClass(target_hregclass); vex_printf("\n"); - vpanic("doRegisterAllocation: cannot find a register."); + vpanic("registerAllocation: cannot find a register."); } return vreg_found; @@ -346,19 +504,18 @@ static inline HReg find_vreg_to_spill( a callee-save register because it won't be used for parameter passing around helper function calls. */ static Bool find_free_rreg( - VRegState* vreg_state, UInt n_vregs, - RRegState* rreg_state, UInt n_rregs, - const RRegLRState* rreg_lr_state, - UInt current_ii, HRegClass target_hregclass, + const RegAllocChunk* chunk, + const VRegState* vreg_state, UInt n_vregs, const RRegState* rreg_state, + Short ii_chunk_current, HRegClass target_hregclass, Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) { Bool found = False; - UInt distance_so_far = 0; /* running max for |live_after - current_ii| */ + Short distance_so_far = 0; /* running max for |live_after - current_ii| */ for (UInt r_idx = con->univ->allocable_start[target_hregclass]; r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { const RRegState* rreg = &rreg_state[r_idx]; - const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + const RRegLRState* rreg_lrs = &chunk->rreg_lr_state[r_idx]; if (rreg->disp == Free) { if (rreg_lrs->lrs_used == 0) { found = True; @@ -366,14 +523,14 @@ static Bool find_free_rreg( break; /* There could be nothing better, so break now. */ } else { const RRegLR* lr = rreg_lrs->lr_current; - if (lr->live_after > (Short) current_ii) { + if (lr->live_after > ii_chunk_current) { /* Not live, yet. */ - if ((lr->live_after - (Short) current_ii) > distance_so_far) { - distance_so_far = lr->live_after - (Short) current_ii; + if ((lr->live_after - ii_chunk_current) > distance_so_far) { + distance_so_far = lr->live_after - ii_chunk_current; found = True; *r_idx_found = r_idx; } - } else if ((Short) current_ii >= lr->dead_before) { + } else if (ii_chunk_current >= lr->dead_before) { /* Now dead. Effectively as if there is no LR now. */ found = True; *r_idx_found = r_idx; @@ -391,172 +548,140 @@ static Bool find_free_rreg( return found; } -/* A target-independent register allocator (v3). Requires various functions - which it uses to deal abstractly with instructions and registers, since it - cannot have any target-specific knowledge. - Returns a new list of instructions, which, as a result of the behaviour of - mapRegs, will be in-place modifications of the original instructions. +/* --- Stage 1. --- + Determine total ordering of instructions and structure of HInstrIfThenElse. + Build similar structure of RegAllocChunk's. */ +static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs, + RegAllocChunk** first_chunk, const RegAllocControl* con) +{ + vassert(instrs_in->insns_used > 0); - Requires that the incoming code has been generated using vreg numbers - 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked - run-time error. + Short ii_vec_start = 0; - Takes unallocated instructions and returns allocated instructions. -*/ -HInstrSB* doRegisterAllocation( - /* Incoming virtual-registerised code. */ - HInstrSB* sb_in, + RegAllocChunk* chunk = new_chunk(instrs_in, n_rregs); + chunk->ii_vec_start = ii_vec_start; + chunk->ii_total_start = ii_total_start; + chunk->instrs_out = newHInstrVec(); + *first_chunk = chunk; - /* Register allocator controls to use. */ - const RegAllocControl* con -) -{ - vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0); + /* Now split incoming HInstrVec into chunks separated by HInstrIfThenElse. */ + for (Short ii_vec = 0; ii_vec < instrs_in->insns_used; ii_vec++) { + HInstr* instr = instrs_in->insns[ii_vec]; - /* TODO-JIT: for now, work only with the first HInstrVec. */ - HInstrVec* instrs_in = sb_in->insns; + HInstrIfThenElse* hite = con->isIfThenElse(instr); + if (LIKELY((hite == NULL) && (ii_vec < instrs_in->insns_used - 1))) { + continue; + } - /* The main register allocator state. */ - UInt n_vregs = sb_in->n_vregs; - VRegState* vreg_state = NULL; - if (n_vregs > 0) { - vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState)); - } + /* A chunk before HInstrIfThenElse or the last chunk of HInstrVec. */ + if (hite != NULL) { + /* Omit HInstrIfThenElse. */ + chunk->ii_vec_len = ii_vec - ii_vec_start; + ii_vec++; + } else { + chunk->ii_vec_len = (ii_vec - ii_vec_start) + 1; + } + ii_total_start += chunk->ii_vec_len; + ii_vec_start = ii_vec; + + if (hite != NULL) { + RegAllocChunk* chunk_fallThrough; + UInt ii_total_fallThrough = stage1(hite->fallThrough, ii_total_start, + n_rregs, &chunk_fallThrough, con); + RegAllocChunk* chunk_outOfLine; + UInt ii_total_outOfLine = stage1(hite->outOfLine, ii_total_start, + n_rregs, &chunk_outOfLine, con); + + chunk->isIfThenElse = True; + chunk->IfThenElse.ccOOL = hite->ccOOL; + chunk->IfThenElse.fallThrough = chunk_fallThrough; + chunk->IfThenElse.outOfLine = chunk_outOfLine; + chunk->IfThenElse.phi_nodes = hite->phi_nodes; + chunk->IfThenElse.n_phis = hite->n_phis; + + ii_total_start = MAX(ii_total_fallThrough, ii_total_outOfLine); + } - /* If this is not so, the universe we have is nonsensical. */ - UInt n_rregs = con->univ->allocable; - vassert(n_rregs > 0); - STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64); + if (ii_vec < instrs_in->insns_used - 1) { + RegAllocChunk* previous = chunk; + chunk = new_chunk(instrs_in, n_rregs); + chunk->ii_vec_start = ii_vec_start; + chunk->ii_total_start = toShort(ii_total_start); + chunk->instrs_out = (*first_chunk)->instrs_out; + previous->next = chunk; + } + } - /* Redundant rreg -> vreg state. */ - RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); + return ii_total_start; +} - /* Info on rreg live ranges. */ - RRegLRState* rreg_lr_state - = LibVEX_Alloc_inline(n_rregs * sizeof(RRegLRState)); - /* Info on register usage in the incoming instruction. Computed once +/* --- Stage 2. --- + Scan the incoming instructions. + Note: vreg state is initially global (shared accross all chunks). + rreg state is inherently local to every chunk. */ +static void stage2_chunk(RegAllocChunk* chunk, VRegState* vreg_state, + UInt n_vregs, UInt n_rregs, UInt depth, const RegAllocControl* con) +{ + /* Info on register usage in the incoming instructions. Computed once and remains unchanged, more or less; updated sometimes by the direct-reload optimisation. */ - HRegUsage* reg_usage - = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->insns_used); - - /* The live range numbers are signed shorts, and so limiting the - number of instructions to 15000 comfortably guards against them - overflowing 32k. */ - vassert(instrs_in->insns_used <= 15000); - - /* The output SB of instructions. */ - HInstrSB* sb_out = newHInstrSB(); - sb_out->n_vregs = n_vregs; - HInstrVec* instrs_out = sb_out->insns; - - -# define OFFENDING_VREG(_v_idx, _instr, _mode) \ - do { \ - vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \ - vex_printf("\nOffending instruction = "); \ - con->ppInstr((_instr), con->mode64); \ - vex_printf("\n"); \ - vpanic("doRegisterAllocation: first event for vreg is "#_mode \ - " (should be Write)"); \ + chunk->reg_usage = LibVEX_Alloc_inline(sizeof(HRegUsage) * chunk->ii_vec_len); + +# define OFFENDING_VREG(_v_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("registerAllocation: first event for vreg is "#_mode \ + " (should be Write)"); \ } while (0) -# define OFFENDING_RREG(_r_idx, _instr, _mode) \ - do { \ - vex_printf("\n\nOffending rreg = "); \ - con->ppReg(con->univ->regs[(_r_idx)]); \ - vex_printf("\nOffending instruction = "); \ - con->ppInstr((_instr), con->mode64); \ - vex_printf("\n"); \ - vpanic("doRegisterAllocation: first event for rreg is "#_mode \ - " (should be Write)"); \ +# define OFFENDING_RREG(_r_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending rreg = "); \ + con->ppReg(con->univ->regs[(_r_idx)]); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("registerAllocation: first event for rreg is "#_mode" \ + (should be Write)"); \ } while (0) + Short ii_chunk = 0; + for (Short ii_vec = chunk->ii_vec_start; + ii_vec < chunk->ii_vec_start + chunk->ii_vec_len; + ii_vec++, ii_chunk++) { + const HInstr* instr = chunk->instrs_in->insns[ii_vec]; -/* Finds an rreg of the correct class. - If a free rreg is not found, then spills a vreg not used by the current - instruction and makes free the corresponding rreg. */ -# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ - ({ \ - UInt _r_free_idx = -1; \ - Bool free_rreg_found = find_free_rreg( \ - vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ - (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ - if (!free_rreg_found) { \ - HReg vreg_to_spill = find_vreg_to_spill( \ - vreg_state, n_vregs, rreg_state, n_rregs, \ - ®_usage[(_ii)], (_reg_class), \ - reg_usage, (_ii) + 1, \ - instrs_in->insns_used - 1, con); \ - _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \ - (_ii), vreg_state, n_vregs, \ - rreg_state, n_rregs, \ - instrs_out, con); \ - } \ - \ - vassert(IS_VALID_RREGNO(_r_free_idx)); \ - \ - _r_free_idx; \ - }) - - - /* --- Stage 0. Initialize the state. --- */ - for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { - vreg_state[v_idx].live_after = INVALID_INSTRNO; - vreg_state[v_idx].dead_before = INVALID_INSTRNO; - vreg_state[v_idx].reg_class = HRcINVALID; - vreg_state[v_idx].disp = Unallocated; - vreg_state[v_idx].rreg = INVALID_HREG; - vreg_state[v_idx].spill_offset = 0; - } - - for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { - rreg_state[r_idx].disp = Free; - rreg_state[r_idx].vreg = INVALID_HREG; - } - - for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { - RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; - rreg_lrs->lrs_size = 4; - rreg_lrs->lrs = LibVEX_Alloc_inline(rreg_lrs->lrs_size - * sizeof(RRegLR)); - rreg_lrs->lrs_used = 0; - rreg_lrs->lr_current = &rreg_lrs->lrs[0]; - rreg_lrs->lr_current_idx = 0; - } - - /* --- Stage 1. Scan the incoming instructions. --- */ - for (UShort ii = 0; ii < instrs_in->insns_used; ii++) { - const HInstr* instr = instrs_in->insns[ii]; - - HInstrIfThenElse* hite = con->isIfThenElse(instr); - if (UNLIKELY(hite != NULL)) { - vpanic("doRegisterAllocation: If-Then-Else unsupported"); - } - - con->getRegUsage(®_usage[ii], instr, con->mode64); + con->getRegUsage(&chunk->reg_usage[ii_chunk], instr, con->mode64); if (0) { - vex_printf("\n%u stage 1: ", ii); + vex_printf("\n"); + print_depth(depth); + vex_printf("stage 2: %d (chunk) %d (vec) %d (total) stage 2: ", + ii_chunk, ii_vec, INSTRNO_TOTAL); con->ppInstr(instr, con->mode64); vex_printf("\n"); - ppHRegUsage(con->univ, ®_usage[ii]); + print_depth(depth); + ppHRegUsage(con->univ, &chunk->reg_usage[ii_chunk]); } /* Process virtual registers mentioned in the instruction. */ - for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { - HReg vreg = reg_usage[ii].vRegs[j]; + for (UInt j = 0; j < chunk->reg_usage[ii_chunk].n_vRegs; j++) { + HReg vreg = chunk->reg_usage[ii_chunk].vRegs[j]; vassert(hregIsVirtual(vreg)); UInt v_idx = hregIndex(vreg); if (!IS_VALID_VREGNO(v_idx)) { vex_printf("\n"); + print_depth(depth); con->ppInstr(instr, con->mode64); vex_printf("\n"); vex_printf("vreg %u (n_vregs %u)\n", v_idx, n_vregs); - vpanic("doRegisterAllocation: out-of-range vreg"); + vpanic("registerAllocation (stage 2): out-of-range vreg"); } /* Note the register class. */ @@ -569,33 +694,36 @@ HInstrSB* doRegisterAllocation( } /* Consider live ranges. */ - switch (reg_usage[ii].vMode[j]) { + switch (chunk->reg_usage[ii_chunk].vMode[j]) { case HRmRead: if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { OFFENDING_VREG(v_idx, instr, "Read"); } - vreg_state[v_idx].dead_before = toShort(ii + 1); break; case HRmWrite: if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { - vreg_state[v_idx].live_after = toShort(ii); + vreg_state[v_idx].live_after = INSTRNO_TOTAL; + } else if (vreg_state[v_idx].live_after > INSTRNO_TOTAL) { + vreg_state[v_idx].live_after = INSTRNO_TOTAL; } - vreg_state[v_idx].dead_before = toShort(ii + 1); break; case HRmModify: if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { OFFENDING_VREG(v_idx, instr, "Modify"); } - vreg_state[v_idx].dead_before = toShort(ii + 1); break; default: vassert(0); } + + if (vreg_state[v_idx].dead_before < INSTRNO_TOTAL + 1) { + vreg_state[v_idx].dead_before = INSTRNO_TOTAL + 1; + } } /* Process real registers mentioned in the instruction. */ - const ULong rRead = reg_usage[ii].rRead; - const ULong rWritten = reg_usage[ii].rWritten; + const ULong rRead = chunk->reg_usage[ii_chunk].rRead; + const ULong rWritten = chunk->reg_usage[ii_chunk].rWritten; const ULong rMentioned = rRead | rWritten; if (rMentioned != 0) { @@ -617,7 +745,7 @@ HInstrSB* doRegisterAllocation( continue; } - RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + RRegLRState* rreg_lrs = &chunk->rreg_lr_state[r_idx]; const Bool isR = (rRead & jMask) != 0; const Bool isW = (rWritten & jMask) != 0; @@ -626,8 +754,8 @@ HInstrSB* doRegisterAllocation( enlarge_rreg_lrs(rreg_lrs); } - rreg_lrs->lrs[rreg_lrs->lrs_used].live_after = toShort(ii); - rreg_lrs->lrs[rreg_lrs->lrs_used].dead_before = toShort(ii + 1); + rreg_lrs->lrs[rreg_lrs->lrs_used].live_after = ii_chunk; + rreg_lrs->lrs[rreg_lrs->lrs_used].dead_before = ii_chunk + 1; rreg_lrs->lrs_used += 1; } else if (!isW && isR) { if ((rreg_lrs->lrs_used == 0) @@ -635,8 +763,7 @@ HInstrSB* doRegisterAllocation( == INVALID_INSTRNO)) { OFFENDING_RREG(r_idx, instr, "Read"); } - rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before - = toShort(ii + 1); + rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before = ii_chunk + 1; } else { vassert(isR && isW); if ((rreg_lrs->lrs_used == 0) @@ -644,21 +771,73 @@ HInstrSB* doRegisterAllocation( == INVALID_INSTRNO)) { OFFENDING_RREG(r_idx, instr, "Modify"); } - rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before - = toShort(ii + 1); + rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before = ii_chunk + 1; } } } } +} - if (DEBUG_REGALLOC) { - for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { - vex_printf("vreg %3u: [%3d, %3d)\n", - v_idx, vreg_state[v_idx].live_after, - vreg_state[v_idx].dead_before); +static void stage2_phi_nodes(RegAllocChunk* chunk, VRegState* vreg_state, + UInt n_vregs, UInt depth, const RegAllocControl* con) +{ + vassert(chunk->next != NULL); + Short ii_total_next = chunk->next->ii_total_start; + + for (UInt p = 0; p < chunk->IfThenElse.n_phis; p++) { + const HPhiNode* phi = &chunk->IfThenElse.phi_nodes[p]; + + /* Extend dead-before of source vregs up to the first instruction + after join from If-Then-Else. */ + UInt v_idx_fallThrough = hregIndex(phi->srcFallThrough); + vassert(vreg_state[v_idx_fallThrough].live_after != INVALID_INSTRNO); + if (vreg_state[v_idx_fallThrough].dead_before < ii_total_next + 1) { + vreg_state[v_idx_fallThrough].dead_before = ii_total_next + 1; } - for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + UInt v_idx_outOfLine = hregIndex(phi->srcOutOfLine); + vassert(vreg_state[v_idx_outOfLine].live_after != INVALID_INSTRNO); + if (vreg_state[v_idx_outOfLine].dead_before < ii_total_next + 1) { + vreg_state[v_idx_outOfLine].dead_before = ii_total_next + 1; + } + + /* Live range for destination vreg begins here. */ + UInt v_idx_dst = hregIndex(phi->dst); + vassert(vreg_state[v_idx_dst].live_after == INVALID_INSTRNO); + vreg_state[v_idx_dst].live_after = ii_total_next; + vreg_state[v_idx_dst].dead_before = ii_total_next + 1; + } +} + +static void stage2(RegAllocChunk* chunk, VRegState* vreg_state, UInt n_vregs, + UInt n_rregs, UInt depth, const RegAllocControl* con) +{ + WALK_CHUNKS(stage2_chunk(chunk, vreg_state, n_vregs, n_rregs, depth, con), + stage2(chunk->IfThenElse.fallThrough, vreg_state, n_vregs, + n_rregs, depth + 1, con), + stage2(chunk->IfThenElse.outOfLine, vreg_state, n_vregs, + n_rregs, depth + 1, con), + stage2_phi_nodes(chunk, vreg_state, n_vregs, depth, con)); +} + +static void stage2_debug_vregs(const VRegState* vreg_state, UInt n_vregs) +{ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + vex_printf("vreg %3u: [%3d, %3d)\n", + v_idx, vreg_state[v_idx].live_after, + vreg_state[v_idx].dead_before); + } +} + +static void stage2_debug_rregs_chunk(RegAllocChunk* chunk, UInt depth, + const RegAllocControl* con) +{ + Bool any_lrs = False; + for (UInt r_idx = 0; r_idx < chunk->n_rregs; r_idx++) { + const RRegLRState* rreg_lrs = &chunk->rreg_lr_state[r_idx]; + if (rreg_lrs->lrs_used > 0) { + any_lrs = True; + print_depth(depth); vex_printf("rreg %2u (", r_idx); UInt written = con->ppReg(con->univ->regs[r_idx]); vex_printf("):"); @@ -666,7 +845,6 @@ HInstrSB* doRegisterAllocation( vex_printf(" "); } - const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; for (UInt l = 0; l < rreg_lrs->lrs_used; l++) { vex_printf("[%3d, %3d) ", rreg_lrs->lrs[l].live_after, rreg_lrs->lrs[l].dead_before); @@ -675,7 +853,36 @@ HInstrSB* doRegisterAllocation( } } - /* --- Stage 2. Allocate spill slots. --- */ + if (!any_lrs) { + print_depth(depth); + vex_printf("[no rreg live ranges for this chunk]\n"); + } +} + +static void stage2_debug_rregs(RegAllocChunk* chunk, UInt depth, + const RegAllocControl* con) +{ + WALK_CHUNKS(stage2_debug_rregs_chunk(chunk, depth, con), + stage2_debug_rregs(chunk->IfThenElse.fallThrough, depth + 1, con), + stage2_debug_rregs(chunk->IfThenElse.outOfLine, depth + 1, con), + ;); +} + +/* Allocates spill slots. Because VRegState is initiall global, also spill slots + are initially global. This might have an adverse effect that spill slots will + eventuall run out if there are too many nested If-Then-Else legs. In that + case, VRegState must not be initially global but rather local to every leg; + and vregs will need to eventually have extended their live ranges after legs + merge. */ +static void stage3(VRegState* vreg_state, UInt n_vregs, + const RegAllocControl* con) +{ +# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) + STATIC_ASSERT((N_SPILL64S % 2) == 0); + STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0); + + Short ss_busy_until_before[N_SPILL64S]; + vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before)); /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits to spill (for example classes Flt64 and Vec128), we have to allocate two @@ -700,13 +907,6 @@ HInstrSB* doRegisterAllocation( values as possible in spill slots, but nevertheless need to have a spill slot available for all vregs, just in case. */ -# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) - STATIC_ASSERT((N_SPILL64S % 2) == 0); - STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0); - - Short ss_busy_until_before[N_SPILL64S]; - vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before)); - for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { /* True iff this vreg is unused. In which case we also expect that the reg_class field for it has not been set. */ @@ -728,10 +928,11 @@ HInstrSB* doRegisterAllocation( /* Find two adjacent free slots which provide up to 128 bits to spill the vreg. Since we are trying to find an even:odd pair, move along in steps of 2 (slots). */ - for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2) + for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2) { if (ss_busy_until_before[ss_no + 0] <= vreg_state[v_idx].live_after && ss_busy_until_before[ss_no + 1] <= vreg_state[v_idx].live_after) break; + } if (ss_no >= N_SPILL64S - 1) { vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); } @@ -777,30 +978,72 @@ HInstrSB* doRegisterAllocation( v_idx, vreg_state[v_idx].spill_offset); } +# undef N_SPILL64S +} + - /* --- State 3. Process instructions. --- */ - for (UShort ii = 0; ii < instrs_in->insns_used; ii++) { - HInstr* instr = instrs_in->insns[ii]; +static void stage4_chunk(RegAllocChunk* chunk, + VRegState* vreg_state, UInt n_vregs, RRegState* rreg_state, + UInt depth, const RegAllocControl* con) +{ + UInt n_rregs = chunk->n_rregs; + +/* Finds an rreg of the correct class. + If a free rreg is not found, then spills a vreg not used by the current + instruction and makes free the corresponding rreg. */ +# define FIND_OR_MAKE_FREE_RREG(_v_idx, _reg_class, _reserve_phase) \ + ({ \ + UInt _r_free_idx = -1; \ + Bool free_rreg_found = find_free_rreg(chunk, \ + vreg_state, n_vregs, rreg_state, \ + ii_chunk, (_reg_class), (_reserve_phase), \ + con, &_r_free_idx); \ + if (!free_rreg_found) { \ + HReg vreg_to_spill = find_vreg_to_spill(chunk, \ + vreg_state, n_vregs, rreg_state, \ + &chunk->reg_usage[ii_chunk], (_reg_class), \ + ii_chunk, con); \ + _r_free_idx = spill_vreg(chunk, vreg_state, n_vregs, rreg_state, \ + vreg_to_spill, hregIndex(vreg_to_spill), \ + INSTRNO_TOTAL, con); \ + } \ + \ + vassert(IS_VALID_RREGNO(_r_free_idx)); \ + \ + _r_free_idx; \ + }) + + Short ii_chunk = 0; + for (Short ii_vec = chunk->ii_vec_start; + ii_vec < chunk->ii_vec_start + chunk->ii_vec_len; + ii_vec++, ii_chunk++) { + + HInstr* instr = chunk->instrs_in->insns[ii_vec]; + HRegUsage* reg_usage = &chunk->reg_usage[ii_chunk]; if (DEBUG_REGALLOC) { - vex_printf("\n====----====---- Instr %d ----====----====\n", ii); + print_depth(depth); + vex_printf("\n====---- Instr: chunk %d, vec %d, total %d ----====\n", + ii_chunk, ii_vec, INSTRNO_TOTAL); + print_depth(depth); vex_printf("---- "); - con->ppInstr(instrs_in->insns[ii], con->mode64); + con->ppInstr(chunk->instrs_in->insns[ii_vec], con->mode64); + print_depth(depth); vex_printf("\n\nInitial state:\n"); - print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, - rreg_lr_state, ii); + print_state(chunk, vreg_state, n_vregs, rreg_state, INSTRNO_TOTAL, con); vex_printf("\n"); } /* ------------ Sanity checks ------------ */ /* Sanity checks are relatively expensive. So they are done only once - every 17 instructions, and just before the last instruction. */ + every 17 instructions, and just before the last instruction in every + HInstrVec. */ Bool do_sanity_check = toBool( SANITY_CHECKS_EVERY_INSTR - || ii == instrs_in->insns_used - 1 - || (ii > 0 && (ii % 17) == 0) + || ii_vec == chunk->ii_vec_len - 1 + || (ii_chunk > 0 && (ii_chunk % 17) == 0) ); if (do_sanity_check) { @@ -823,10 +1066,11 @@ HInstrSB* doRegisterAllocation( } for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { - if (rreg_state[r_idx].disp == Bound) { - vassert(hregIsVirtual(rreg_state[r_idx].vreg)); + const RRegState* rreg = &rreg_state[r_idx]; + if (rreg->disp == Bound) { + vassert(hregIsVirtual(rreg->vreg)); - UInt v_idx = hregIndex(rreg_state[r_idx].vreg); + UInt v_idx = hregIndex(rreg->vreg); vassert(IS_VALID_VREGNO(v_idx)); vassert(vreg_state[v_idx].disp == Assigned); vassert(hregIndex(vreg_state[v_idx].rreg) == r_idx); @@ -837,11 +1081,11 @@ HInstrSB* doRegisterAllocation( a corresponding hard live range for it. */ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { if (rreg_state[r_idx].disp == Reserved) { - const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + const RRegLRState* rreg_lrs = &chunk->rreg_lr_state[r_idx]; vassert(rreg_lrs->lrs_used > 0); vassert(rreg_lrs->lr_current_idx < rreg_lrs->lrs_used); - vassert(rreg_lrs->lr_current->live_after <= (Short) ii); - vassert((Short) ii < rreg_lrs->lr_current->dead_before); + vassert(rreg_lrs->lr_current->live_after <= ii_chunk); + vassert(ii_chunk < rreg_lrs->lr_current->dead_before); } } } @@ -865,8 +1109,8 @@ HInstrSB* doRegisterAllocation( vassert(IS_VALID_VREGNO(vs_idx)); vassert(IS_VALID_VREGNO(vd_idx)); - if ((vreg_state[vs_idx].dead_before == ii + 1) - && (vreg_state[vd_idx].live_after == ii) + if ((vreg_state[vs_idx].dead_before == INSTRNO_TOTAL + 1) + && (vreg_state[vd_idx].live_after == INSTRNO_TOTAL) && (vreg_state[vs_idx].disp == Assigned)) { /* Live ranges are adjacent and source vreg is bound. @@ -882,6 +1126,7 @@ HInstrSB* doRegisterAllocation( rreg_state[r_idx].vreg = vregD; if (DEBUG_REGALLOC) { + print_depth(depth); vex_printf("coalesced: "); con->ppReg(vregS); vex_printf(" -> "); @@ -892,9 +1137,9 @@ HInstrSB* doRegisterAllocation( /* In rare cases it can happen that vregD's live range ends here. Check and eventually free the vreg and rreg. This effectively means that either the translated program - contained dead code (but VEX iropt passes are pretty good + contained dead code (although VEX iropt passes are pretty good at eliminating it) or the VEX backend generated dead code. */ - if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) { + if (vreg_state[vd_idx].dead_before <= INSTRNO_TOTAL + 1) { vreg_state[vd_idx].disp = Unallocated; vreg_state[vd_idx].rreg = INVALID_HREG; rreg_state[r_idx].disp = Free; @@ -918,8 +1163,8 @@ HInstrSB* doRegisterAllocation( 2b. Move the corresponding vreg to a free rreg. This is better than spilling it and immediatelly reloading it. */ - const ULong rRead = reg_usage[ii].rRead; - const ULong rWritten = reg_usage[ii].rWritten; + const ULong rRead = reg_usage->rRead; + const ULong rWritten = reg_usage->rWritten; const ULong rMentioned = rRead | rWritten; if (rMentioned != 0) { @@ -937,7 +1182,7 @@ HInstrSB* doRegisterAllocation( } RRegState* rreg = &rreg_state[r_idx]; - const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + const RRegLRState* rreg_lrs = &chunk->rreg_lr_state[r_idx]; if (LIKELY(rreg_lrs->lrs_used == 0)) { continue; } @@ -945,8 +1190,8 @@ HInstrSB* doRegisterAllocation( continue; } - if ((rreg_lrs->lr_current->live_after <= (Short) ii) - && ((Short) ii < rreg_lrs->lr_current->dead_before)) { + if ((rreg_lrs->lr_current->live_after <= ii_chunk) + && (ii_chunk < rreg_lrs->lr_current->dead_before)) { if (rreg->disp == Bound) { /* Yes, there is an associated vreg. We need to deal with @@ -954,20 +1199,20 @@ HInstrSB* doRegisterAllocation( HReg vreg = rreg->vreg; UInt v_idx = hregIndex(vreg); - if (! HRegUsage__contains(®_usage[ii], vreg)) { + if (! HRegUsage__contains(reg_usage, vreg)) { /* Spill the vreg. It is not used by this instruction. */ - spill_vreg(vreg, v_idx, ii, vreg_state, n_vregs, - rreg_state, n_rregs, instrs_out, con); + spill_vreg(chunk, vreg_state, n_vregs, rreg_state, + vreg, v_idx, INSTRNO_TOTAL, con); } else { /* Find or make a free rreg where to move this vreg to. */ UInt r_free_idx = FIND_OR_MAKE_FREE_RREG( - ii, v_idx, vreg_state[v_idx].reg_class, True); + v_idx, vreg_state[v_idx].reg_class, True); /* Generate "move" between real registers. */ HInstr* move = con->genMove(con->univ->regs[r_idx], con->univ->regs[r_free_idx], con->mode64); vassert(move != NULL); - emit_instr(move, instrs_out, con, "move"); + emit_instr(chunk, move, con, "move"); /* Update the register allocator state. */ vassert(vreg_state[v_idx].disp == Assigned); @@ -983,6 +1228,7 @@ HInstrSB* doRegisterAllocation( rreg->disp = Reserved; if (DEBUG_REGALLOC) { + print_depth(depth); vex_printf("rreg has been reserved: "); con->ppReg(con->univ->regs[r_idx]); vex_printf("\n\n"); @@ -1001,24 +1247,24 @@ HInstrSB* doRegisterAllocation( instrs_in->insns[ii] with this new instruction, and recompute its reg_usage, so that the change is invisible to the standard-case handling that follows. */ - if ((con->directReload != NULL) && (reg_usage[ii].n_vRegs <= 2)) { + if ((con->directReload != NULL) && (reg_usage->n_vRegs <= 2)) { Bool debug_direct_reload = False; Bool nreads = 0; HReg vreg_found = INVALID_HREG; Short spill_offset = 0; - for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { - HReg vreg = reg_usage[ii].vRegs[j]; + for (UInt j = 0; j < reg_usage->n_vRegs; j++) { + HReg vreg = reg_usage->vRegs[j]; vassert(hregIsVirtual(vreg)); - if (reg_usage[ii].vMode[j] == HRmRead) { + if (reg_usage->vMode[j] == HRmRead) { nreads++; UInt v_idx = hregIndex(vreg); vassert(IS_VALID_VREGNO(v_idx)); if (vreg_state[v_idx].disp == Spilled) { /* Is this its last use? */ - vassert(vreg_state[v_idx].dead_before >= (Short) (ii + 1)); - if ((vreg_state[v_idx].dead_before == (Short) (ii + 1)) + vassert(vreg_state[v_idx].dead_before >= INSTRNO_TOTAL + 1); + if ((vreg_state[v_idx].dead_before == INSTRNO_TOTAL + 1) && hregIsInvalid(vreg_found)) { vreg_found = vreg; spill_offset = vreg_state[v_idx].spill_offset; @@ -1028,14 +1274,14 @@ HInstrSB* doRegisterAllocation( } if (!hregIsInvalid(vreg_found) && (nreads == 1)) { - if (reg_usage[ii].n_vRegs == 2) { - vassert(! sameHReg(reg_usage[ii].vRegs[0], - reg_usage[ii].vRegs[1])); + if (reg_usage->n_vRegs == 2) { + vassert(! sameHReg(reg_usage->vRegs[0], reg_usage->vRegs[1])); } - HInstr* reloaded = con->directReload(instrs_in->insns[ii], - vreg_found, spill_offset); + HInstr* reloaded = con->directReload( + chunk->instrs_in->insns[ii_vec], vreg_found, spill_offset); if (debug_direct_reload && (reloaded != NULL)) { + print_depth(depth); vex_printf("[%3d] ", spill_offset); ppHReg(vreg_found); vex_printf(": "); @@ -1045,8 +1291,8 @@ HInstrSB* doRegisterAllocation( /* Update info about the instruction, so it looks as if it had been in this form all along. */ instr = reloaded; - instrs_in->insns[ii] = reloaded; - con->getRegUsage(®_usage[ii], instr, con->mode64); + chunk->instrs_in->insns[ii_vec] = reloaded; + con->getRegUsage(reg_usage, instr, con->mode64); if (debug_direct_reload) { vex_printf(" --> "); con->ppInstr(reloaded, con->mode64); @@ -1072,12 +1318,15 @@ HInstrSB* doRegisterAllocation( - Spilled: Find a free rreg and reload vreg into it. Naturally, finding a free rreg may involve spilling a vreg not used by the instruction. */ - for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { - HReg vreg = reg_usage[ii].vRegs[j]; + for (UInt j = 0; j < reg_usage->n_vRegs; j++) { + HReg vreg = reg_usage->vRegs[j]; vassert(hregIsVirtual(vreg)); if (0) { - vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); + print_depth(depth); + vex_printf("considering "); + con->ppReg(vreg); + vex_printf("\n"); } UInt v_idx = hregIndex(vreg); @@ -1092,14 +1341,14 @@ HInstrSB* doRegisterAllocation( /* Find or make a free rreg of the correct class. */ UInt r_idx = FIND_OR_MAKE_FREE_RREG( - ii, v_idx, vreg_state[v_idx].reg_class, False); + v_idx, vreg_state[v_idx].reg_class, False); rreg = con->univ->regs[r_idx]; /* Generate reload only if the vreg is spilled and is about to being read or modified. If it is merely written than reloading it first would be pointless. */ if ((vreg_state[v_idx].disp == Spilled) - && (reg_usage[ii].vMode[j] != HRmWrite)) { + && (reg_usage->vMode[j] != HRmWrite)) { HInstr* reload1 = NULL; HInstr* reload2 = NULL; @@ -1107,10 +1356,10 @@ HInstrSB* doRegisterAllocation( vreg_state[v_idx].spill_offset, con->mode64); vassert(reload1 != NULL || reload2 != NULL); if (reload1 != NULL) { - emit_instr(reload1, instrs_out, con, "reload1"); + emit_instr(chunk, reload1, con, "reload1"); } if (reload2 != NULL) { - emit_instr(reload2, instrs_out, con, "reload2"); + emit_instr(chunk, reload2, con, "reload2"); } } @@ -1123,12 +1372,12 @@ HInstrSB* doRegisterAllocation( } con->mapRegs(&remap, instr, con->mode64); - emit_instr(instr, instrs_out, con, NULL); + emit_instr(chunk, instr, con, NULL); if (DEBUG_REGALLOC) { + print_depth(depth); vex_printf("After dealing with current instruction:\n"); - print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, - rreg_lr_state, ii); + print_state(chunk, vreg_state, n_vregs, rreg_state, INSTRNO_TOTAL, con); vex_printf("\n"); } @@ -1137,15 +1386,15 @@ HInstrSB* doRegisterAllocation( - Have been reserved and whose hard live range ended. - Have been bound to vregs whose live range ended. */ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { - RRegState* rreg = &rreg_state[r_idx]; - RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + RRegState* rreg = &rreg_state[r_idx]; + RRegLRState* rreg_lrs = &chunk->rreg_lr_state[r_idx]; switch (rreg->disp) { case Free: break; case Reserved: if (rreg_lrs->lrs_used > 0) { /* Consider "dead before" the next instruction. */ - if (rreg_lrs->lr_current->dead_before <= (Short) ii + 1) { + if (rreg_lrs->lr_current->dead_before <= ii_chunk + 1) { rreg_state[r_idx].disp = Free; rreg_state[r_idx].vreg = INVALID_HREG; ... [truncated message content] |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:56
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=afd5c9223ab45b91d03781dbd401ab324b663ce7 commit afd5c9223ab45b91d03781dbd401ab324b663ce7 Author: Ivo Raisr <iv...@iv...> Date: Sun Aug 13 19:18:11 2017 +0200 Fix VEX register allocator (v3) to work with HInstrSB, HInstrVec. It does not support If-Then-Else, though. Diff: --- Makefile.vex.am | 1 - VEX/priv/host_generic_reg_alloc2.c | 1540 ------------------------------------ VEX/priv/host_generic_reg_alloc3.c | 98 +-- VEX/priv/host_generic_regs.h | 10 +- VEX/priv/main_main.c | 13 +- VEX/pub/libvex.h | 5 - coregrind/m_main.c | 2 - 7 files changed, 58 insertions(+), 1611 deletions(-) diff --git a/Makefile.vex.am b/Makefile.vex.am index e0c17e4..a1a1918 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -130,7 +130,6 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_simd128.c \ priv/host_generic_simd256.c \ priv/host_generic_maddf.c \ - priv/host_generic_reg_alloc2.c \ priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ priv/host_x86_isel.c diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c deleted file mode 100644 index ec291d3..0000000 --- a/VEX/priv/host_generic_reg_alloc2.c +++ /dev/null @@ -1,1540 +0,0 @@ - -/*---------------------------------------------------------------*/ -/*--- begin host_reg_alloc2.c ---*/ -/*---------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2004-2017 OpenWorks LLP - in...@op... - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - - The GNU General Public License is contained in the file COPYING. - - Neither the names of the U.S. Department of Energy nor the - University of California nor the names of its contributors may be - used to endorse or promote products derived from this software - without prior written permission. -*/ - -#include "libvex_basictypes.h" -#include "libvex.h" - -#include "main_util.h" -#include "host_generic_regs.h" - -/* Set to 1 for lots of debugging output. */ -#define DEBUG_REGALLOC 0 - - -/* TODO 27 Oct 04: - - Better consistency checking from what isMove tells us. - - We can possibly do V-V coalescing even when the src is spilled, - providing we can arrange for the dst to have the same spill slot. - - Note that state[].hreg is the same as the available real regs. - - Generally rationalise data structures. */ - - -/* Records information on virtual register live ranges. Computed once - and remains unchanged after that. */ -typedef - struct { - /* Becomes live for the first time after this insn ... */ - Short live_after; - /* Becomes dead for the last time before this insn ... */ - Short dead_before; - /* The "home" spill slot, if needed. Never changes. */ - Short spill_offset; - Short spill_size; - /* What kind of register this is. */ - HRegClass reg_class; - } - VRegLR; - - -/* Records information on real-register live ranges. Computed once - and remains unchanged after that. */ -typedef - struct { - HReg rreg; - /* Becomes live after this insn ... */ - Short live_after; - /* Becomes dead before this insn ... */ - Short dead_before; - } - RRegLR; - - -/* An array of the following structs (rreg_state) comprises the - running state of the allocator. It indicates what the current - disposition of each allocatable real register is. The array gets - updated as the allocator processes instructions. The identity of - the register is not recorded here, because the index of this - structure in doRegisterAllocation()'s |rreg_state| is the index - number of the register, and the register itself can be extracted - from the RRegUniverse supplied to doRegisterAllocation(). */ -typedef - struct { - /* ------ FIELDS WHICH DO NOT CHANGE ------ */ - /* Is this involved in any HLRs? (only an optimisation hint) */ - Bool has_hlrs; - /* ------ FIELDS WHICH DO CHANGE ------ */ - /* 6 May 07: rearranged fields below so the whole struct fits - into 16 bytes on both x86 and amd64. */ - /* Used when .disp == Bound and we are looking for vregs to - spill. */ - Bool is_spill_cand; - /* Optimisation: used when .disp == Bound. Indicates when the - rreg has the same value as the spill slot for the associated - vreg. Is safely left at False, and becomes True after a - spill store or reload for this rreg. */ - Bool eq_spill_slot; - /* What's it's current disposition? */ - enum { Free, /* available for use */ - Unavail, /* in a real-reg live range */ - Bound /* in use (holding value of some vreg) */ - } - disp; - /* If .disp == Bound, what vreg is it bound to? */ - HReg vreg; - } - RRegState; - - -/* The allocator also maintains a redundant array of indexes - (vreg_state) from vreg numbers back to entries in rreg_state. It - is redundant because iff vreg_state[i] == j then - hregNumber(rreg_state[j].vreg) == i -- that is, the two entries - point at each other. The purpose of this is to speed up activities - which involve looking for a particular vreg: there is no need to - scan the rreg_state looking for it, just index directly into - vreg_state. The FAQ "does this vreg already have an associated - rreg" is the main beneficiary. - - To indicate, in vreg_state[i], that a given vreg is not currently - associated with any rreg, that entry can be set to INVALID_RREG_NO. - - Because the vreg_state entries are signed Shorts, the max number - of vregs that can be handed by regalloc is 32767. -*/ - -#define INVALID_RREG_NO ((Short)(-1)) - -#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) -#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) - - -/* Search forward from some given point in the incoming instruction - sequence. Point is to select a virtual register to spill, by - finding the vreg which is mentioned as far ahead as possible, in - the hope that this will minimise the number of consequent reloads. - - Only do the search for vregs which are Bound in the running state, - and for which the .is_spill_cand field is set. This allows the - caller to arbitrarily restrict the set of spill candidates to be - considered. - - To do this we don't actually need to see the incoming instruction - stream. Rather, what we need us the HRegUsage records for the - incoming instruction stream. Hence that is passed in. - - Returns an index into the state array indicating the (v,r) pair to - spill, or -1 if none was found. */ -static -Int findMostDistantlyMentionedVReg ( - HRegUsage* reg_usages_in, - Int search_from_instr, - Int num_instrs, - RRegState* state, - Int n_state -) -{ - Int k, m; - Int furthest_k = -1; - Int furthest = -1; - vassert(search_from_instr >= 0); - for (k = 0; k < n_state; k++) { - if (!state[k].is_spill_cand) - continue; - vassert(state[k].disp == Bound); - for (m = search_from_instr; m < num_instrs; m++) { - if (HRegUsage__contains(®_usages_in[m], state[k].vreg)) - break; - } - if (m > furthest) { - furthest = m; - furthest_k = k; - } - } - return furthest_k; -} - - -/* Check that this vreg has been assigned a sane spill offset. */ -inline -static void sanity_check_spill_offset ( VRegLR* vreg ) -{ - switch (vreg->reg_class) { - case HRcVec128: case HRcFlt64: - vassert(0 == ((UShort)vreg->spill_offset % 16)); break; - default: - vassert(0 == ((UShort)vreg->spill_offset % 8)); break; - } -} - - -/* Double the size of the real-reg live-range array, if needed. */ -__attribute__((noinline)) -static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used ) -{ - Int k; - RRegLR* arr2; - if (0) - vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); - vassert(used == *size); - arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR)); - for (k = 0; k < *size; k++) - arr2[k] = (*info)[k]; - *size *= 2; - *info = arr2; -} -inline -static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) -{ - if (LIKELY(used < *size)) return; - ensureRRLRspace_SLOW(info, size, used); -} - - -/* Sort an array of RRegLR entries by either the .live_after or - .dead_before fields. This is performance-critical. */ -static void sortRRLRarray ( RRegLR* arr, - Int size, Bool by_live_after ) -{ - Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - Int lo = 0; - Int hi = size-1; - Int i, j, h, bigN, hp; - RRegLR v; - - vassert(size >= 0); - if (size == 0) - return; - - bigN = hi - lo + 1; if (bigN < 2) return; - hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; - - if (by_live_after) { - - for ( ; hp >= 0; hp--) { - h = incs[hp]; - for (i = lo + h; i <= hi; i++) { - v = arr[i]; - j = i; - while (arr[j-h].live_after > v.live_after) { - arr[j] = arr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - arr[j] = v; - } - } - - } else { - - for ( ; hp >= 0; hp--) { - h = incs[hp]; - for (i = lo + h; i <= hi; i++) { - v = arr[i]; - j = i; - while (arr[j-h].dead_before > v.dead_before) { - arr[j] = arr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - arr[j] = v; - } - } - - } -} - - -/* Compute the index of the highest and lowest 1 in a ULong, - respectively. Results are undefined if the argument is zero. - Don't pass it zero :) */ -static inline UInt ULong__maxIndex ( ULong w64 ) { - return 63 - __builtin_clzll(w64); -} - -static inline UInt ULong__minIndex ( ULong w64 ) { - return __builtin_ctzll(w64); -} - - -/* A target-independent register allocator. Requires various - functions which it uses to deal abstractly with instructions and - registers, since it cannot have any target-specific knowledge. - - Returns a new list of instructions, which, as a result of the - behaviour of mapRegs, will be in-place modifications of the - original instructions. - - Requires that the incoming code has been generated using - vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside - that range is a checked run-time error. - - Takes an expandable array of pointers to unallocated insns. - Returns an expandable array of pointers to allocated insns. -*/ -HInstrArray* doRegisterAllocation_v2 ( - - /* Incoming virtual-registerised code. */ - HInstrArray* instrs_in, - - /* Register allocator controls to use. */ - const RegAllocControl* con -) -{ -# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) - - const Bool eq_spill_opt = True; - - /* Info on vregs and rregs. Computed once and remains - unchanged. */ - Int n_vregs; - VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ - - /* We keep two copies of the real-reg live range info, one sorted - by .live_after and the other by .dead_before. First the - unsorted info is created in the _la variant is copied into the - _db variant. Once that's done both of them are sorted. - We also need two integer cursors which record the next - location in the two arrays to consider. */ - RRegLR* rreg_lrs_la; - RRegLR* rreg_lrs_db; - Int rreg_lrs_size; - Int rreg_lrs_used; - Int rreg_lrs_la_next; - Int rreg_lrs_db_next; - - /* Info on register usage in the incoming instruction array. - Computed once and remains unchanged, more or less; updated - sometimes by the direct-reload optimisation. */ - HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */ - - /* Used when constructing vreg_lrs (for allocating stack - slots). */ - Short ss_busy_until_before[N_SPILL64S]; - - /* Used when constructing rreg_lrs. */ - Int* rreg_live_after; - Int* rreg_dead_before; - - /* Running state of the core allocation algorithm. */ - RRegState* rreg_state; /* [0 .. n_rregs-1] */ - Int n_rregs; - - /* .. and the redundant backward map */ - /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. - This implies n_rregs must be <= 32768. */ - Short* vreg_state; /* [0 .. n_vregs-1] */ - - /* The vreg -> rreg map constructed and then applied to each - instr. */ - HRegRemap remap; - - /* The output array of instructions. */ - HInstrArray* instrs_out; - - /* Sanity checks are expensive. They are only done periodically, - not at each insn processed. */ - Bool do_sanity_check; - - vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); - vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); - vassert(0 == (N_SPILL64S % 2)); - - /* The live range numbers are signed shorts, and so limiting the - number of insns to 15000 comfortably guards against them - overflowing 32k. */ - vassert(instrs_in->arr_used <= 15000); - -# define INVALID_INSTRNO (-2) - -# define EMIT_INSTR(_instr) \ - do { \ - HInstr* _tmp = (_instr); \ - if (DEBUG_REGALLOC) { \ - vex_printf("** "); \ - con->ppInstr(_tmp, con->mode64); \ - vex_printf("\n\n"); \ - } \ - addHInstr ( instrs_out, _tmp ); \ - } while (0) - -# define PRINT_STATE \ - do { \ - Int z, q; \ - for (z = 0; z < n_rregs; z++) { \ - vex_printf(" rreg_state[%2d] = ", z); \ - con->ppReg(con->univ->regs[z]); \ - vex_printf(" \t"); \ - switch (rreg_state[z].disp) { \ - case Free: vex_printf("Free\n"); break; \ - case Unavail: vex_printf("Unavail\n"); break; \ - case Bound: vex_printf("BoundTo "); \ - con->ppReg(rreg_state[z].vreg); \ - vex_printf("\n"); break; \ - } \ - } \ - vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ - q = 0; \ - for (z = 0; z < n_vregs; z++) { \ - if (vreg_state[z] == INVALID_RREG_NO) \ - continue; \ - vex_printf("[%d] -> %d ", z, vreg_state[z]); \ - q++; \ - if (q > 0 && (q % 6) == 0) \ - vex_printf("\n "); \ - } \ - vex_printf("\n"); \ - } while (0) - - - /* --------- Stage 0: set up output array --------- */ - /* --------- and allocate/initialise running state. --------- */ - - instrs_out = newHInstrArray(); - - /* ... and initialise running state. */ - /* n_rregs is no more than a short name for n_available_real_regs. */ - n_rregs = con->univ->allocable; - n_vregs = instrs_in->n_vregs; - - /* If this is not so, vreg_state entries will overflow. */ - vassert(n_vregs < 32767); - - /* If this is not so, the universe we have is nonsensical. */ - vassert(n_rregs > 0); - - rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); - vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short)); - - for (Int j = 0; j < n_rregs; j++) { - rreg_state[j].has_hlrs = False; - rreg_state[j].disp = Free; - rreg_state[j].vreg = INVALID_HREG; - rreg_state[j].is_spill_cand = False; - rreg_state[j].eq_spill_slot = False; - } - - for (Int j = 0; j < n_vregs; j++) - vreg_state[j] = INVALID_RREG_NO; - - - /* --------- Stage 1: compute vreg live ranges. --------- */ - /* --------- Stage 2: compute rreg live ranges. --------- */ - - /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ - - /* This is relatively simple, because (1) we only seek the complete - end-to-end live range of each vreg, and are not interested in - any holes in it, and (2) the vregs are conveniently numbered 0 - .. n_vregs-1, so we can just dump the results in a - pre-allocated array. */ - - vreg_lrs = NULL; - if (n_vregs > 0) - vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs); - - for (Int j = 0; j < n_vregs; j++) { - vreg_lrs[j].live_after = INVALID_INSTRNO; - vreg_lrs[j].dead_before = INVALID_INSTRNO; - vreg_lrs[j].spill_offset = 0; - vreg_lrs[j].spill_size = 0; - vreg_lrs[j].reg_class = HRcINVALID; - } - - /* An array to hold the reg-usage info for the incoming - instructions. */ - reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); - - /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ - - /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ - - /* This is more complex than Stage 1, because we need to compute - exactly all the live ranges of all the allocatable real regs, - and we don't know in advance how many there will be. */ - - rreg_lrs_used = 0; - rreg_lrs_size = 4; - rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR)); - rreg_lrs_db = NULL; /* we'll create this later */ - - /* We'll need to track live range start/end points seperately for - each rreg. Sigh. */ - vassert(n_rregs > 0); - rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); - rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); - - for (Int j = 0; j < n_rregs; j++) { - rreg_live_after[j] = - rreg_dead_before[j] = INVALID_INSTRNO; - } - - /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ - - /* ------ start of ITERATE OVER INSNS ------ */ - - for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - - con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64); - - if (0) { - vex_printf("\n%d stage1: ", ii); - con->ppInstr(instrs_in->arr[ii], con->mode64); - vex_printf("\n"); - ppHRegUsage(con->univ, ®_usage_arr[ii]); - } - - /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ - - /* for each virtual reg mentioned in the insn ... */ - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - Int k = hregIndex(vreg); - if (k < 0 || k >= n_vregs) { - vex_printf("\n"); - con->ppInstr(instrs_in->arr[ii], con->mode64); - vex_printf("\n"); - vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); - vpanic("doRegisterAllocation: out-of-range vreg"); - } - - /* Take the opportunity to note its regclass. We'll need - that when allocating spill slots. */ - if (vreg_lrs[k].reg_class == HRcINVALID) { - /* First mention of this vreg. */ - vreg_lrs[k].reg_class = hregClass(vreg); - } else { - /* Seen it before, so check for consistency. */ - vassert(vreg_lrs[k].reg_class == hregClass(vreg)); - } - - /* Now consider live ranges. */ - switch (reg_usage_arr[ii].vMode[j]) { - case HRmRead: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) { - vex_printf("\n\nOFFENDING VREG = %d\n", k); - vpanic("doRegisterAllocation: " - "first event for vreg is Read"); - } - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - case HRmWrite: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) - vreg_lrs[k].live_after = toShort(ii); - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - case HRmModify: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) { - vex_printf("\n\nOFFENDING VREG = %d\n", k); - vpanic("doRegisterAllocation: " - "first event for vreg is Modify"); - } - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - default: - vpanic("doRegisterAllocation(1)"); - } /* switch */ - - } /* iterate over virtual registers */ - - /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ - - /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ - - /* If this doesn't hold, the following iteration over real registers - will fail miserably. */ - vassert(N_RREGUNIVERSE_REGS == 64); - - const ULong rRead = reg_usage_arr[ii].rRead; - const ULong rWritten = reg_usage_arr[ii].rWritten; - const ULong rMentioned = rRead | rWritten; - - UInt rReg_minIndex; - UInt rReg_maxIndex; - if (rMentioned == 0) { - /* There are no real register uses in this insn. Set - rReg_{min,max}Index so that the following loop doesn't iterate - at all, so as to avoid wasting time. */ - rReg_minIndex = 1; - rReg_maxIndex = 0; - } else { - rReg_minIndex = ULong__minIndex(rMentioned); - rReg_maxIndex = ULong__maxIndex(rMentioned); - /* Don't bother to look at registers which are not available - to the allocator. We asserted above that n_rregs > 0, so - n_rregs-1 is safe. */ - if (rReg_maxIndex >= n_rregs) - rReg_maxIndex = n_rregs-1; - } - - /* for each allocator-available real reg mentioned in the insn ... */ - /* Note. We are allocating only over the real regs available to - the allocator. Others, eg the stack or baseblock pointers, - are unavailable to allocation and so we never visit them. - Hence the iteration is cut off at n_rregs-1, since n_rregs == - univ->allocable. */ - for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) { - - const ULong jMask = 1ULL << j; - if (LIKELY((rMentioned & jMask) == 0)) - continue; - - const Bool isR = (rRead & jMask) != 0; - const Bool isW = (rWritten & jMask) != 0; - - /* Dummy initialisations of flush_la and flush_db to avoid - possible bogus uninit-var warnings from gcc. */ - Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; - Bool flush = False; - - if (isW && !isR) { - flush_la = rreg_live_after[j]; - flush_db = rreg_dead_before[j]; - if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO) - flush = True; - rreg_live_after[j] = ii; - rreg_dead_before[j] = ii+1; - } else if (!isW && isR) { - if (rreg_live_after[j] == INVALID_INSTRNO) { - vex_printf("\nOFFENDING RREG = "); - con->ppReg(con->univ->regs[j]); - vex_printf("\n"); - vex_printf("\nOFFENDING instr = "); - con->ppInstr(instrs_in->arr[ii], con->mode64); - vex_printf("\n"); - vpanic("doRegisterAllocation: " - "first event for rreg is Read"); - } - rreg_dead_before[j] = ii+1; - } else { - vassert(isR && isW); - if (rreg_live_after[j] == INVALID_INSTRNO) { - vex_printf("\nOFFENDING RREG = "); - con->ppReg(con->univ->regs[j]); - vex_printf("\n"); - vex_printf("\nOFFENDING instr = "); - con->ppInstr(instrs_in->arr[ii], con->mode64); - vex_printf("\n"); - vpanic("doRegisterAllocation: " - "first event for rreg is Modify"); - } - rreg_dead_before[j] = ii+1; - } - - if (flush) { - vassert(flush_la != INVALID_INSTRNO); - vassert(flush_db != INVALID_INSTRNO); - ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); - if (0) - vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); - rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; - rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); - rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); - rreg_lrs_used++; - } - - } /* iterate over rregs in the instr */ - - /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ - - } /* iterate over insns */ - - /* ------ end of ITERATE OVER INSNS ------ */ - - /* ------ start of FINALISE RREG LIVE RANGES ------ */ - - /* Now finish up any live ranges left over. */ - for (Int j = 0; j < n_rregs; j++) { - - if (0) { - vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], - rreg_dead_before[j]); - } - vassert( (rreg_live_after[j] == INVALID_INSTRNO - && rreg_dead_before[j] == INVALID_INSTRNO) - || - (rreg_live_after[j] != INVALID_INSTRNO - && rreg_dead_before[j] != INVALID_INSTRNO) - ); - - if (rreg_live_after[j] == INVALID_INSTRNO) - continue; - - ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); - if (0) - vex_printf("FLUSH 2 (%d,%d)\n", - rreg_live_after[j], rreg_dead_before[j]); - rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; - rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); - rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); - rreg_lrs_used++; - } - - /* Compute summary hints for choosing real regs. If a real reg is - involved in a hard live range, record that fact in the fixed - part of the running rreg_state. Later, when offered a choice between - rregs, it's better to choose one which is not marked as having - any HLRs, since ones with HLRs may need to be spilled around - their HLRs. Correctness of final assignment is unaffected by - this mechanism -- it is only an optimisation. */ - - for (Int j = 0; j < rreg_lrs_used; j++) { - HReg rreg = rreg_lrs_la[j].rreg; - vassert(!hregIsVirtual(rreg)); - /* rreg is involved in a HLR. Record this info in the array, if - there is space. */ - UInt ix = hregIndex(rreg); - vassert(ix < n_rregs); - rreg_state[ix].has_hlrs = True; - } - if (0) { - for (Int j = 0; j < n_rregs; j++) { - if (!rreg_state[j].has_hlrs) - continue; - con->ppReg(con->univ->regs[j]); - vex_printf(" hinted\n"); - } - } - - /* Finally, copy the _la variant into the _db variant and - sort both by their respective fields. */ - rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR)); - for (Int j = 0; j < rreg_lrs_used; j++) - rreg_lrs_db[j] = rreg_lrs_la[j]; - - sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); - sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); - - /* And set up the cursors. */ - rreg_lrs_la_next = 0; - rreg_lrs_db_next = 0; - - for (Int j = 1; j < rreg_lrs_used; j++) { - vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); - vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); - } - - /* ------ end of FINALISE RREG LIVE RANGES ------ */ - - if (DEBUG_REGALLOC) { - for (Int j = 0; j < n_vregs; j++) { - vex_printf("vreg %d: la = %d, db = %d\n", - j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); - } - } - - if (DEBUG_REGALLOC) { - vex_printf("RRegLRs by LA:\n"); - for (Int j = 0; j < rreg_lrs_used; j++) { - vex_printf(" "); - con->ppReg(rreg_lrs_la[j].rreg); - vex_printf(" la = %d, db = %d\n", - rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); - } - vex_printf("RRegLRs by DB:\n"); - for (Int j = 0; j < rreg_lrs_used; j++) { - vex_printf(" "); - con->ppReg(rreg_lrs_db[j].rreg); - vex_printf(" la = %d, db = %d\n", - rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); - } - } - - /* --------- Stage 3: allocate spill slots. --------- */ - - /* Each spill slot is 8 bytes long. For vregs which take more than - 64 bits to spill (classes Flt64 and Vec128), we have to allocate - two consecutive spill slots. For 256 bit registers (class - Vec256), we have to allocate four consecutive spill slots. - - For Vec128-class on PowerPC, the spill slot's actual address - must be 16-byte aligned. Since the spill slot's address is - computed as an offset from the guest state pointer, and since - the user of the generated code must set that pointer to a - 32-aligned value, we have the residual obligation here of - choosing a 16-aligned spill slot offset for Vec128-class values. - Since each spill slot is 8 bytes long, that means for - Vec128-class values we must allocated a spill slot number which - is zero mod 2. - - Similarly, for Vec256 class on amd64, find a spill slot number - which is zero mod 4. This guarantees it will be 32 byte - aligned, which isn't actually necessary on amd64 (we use movUpd - etc to spill), but seems like good practice. - - Do a rank-based allocation of vregs to spill slot numbers. We - put as few values as possible in spill slots, but nevertheless - need to have a spill slot available for all vregs, just in case. - */ - /* Int max_ss_no = -1; */ - - vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before)); - - for (Int j = 0; j < n_vregs; j++) { - - /* True iff this vreg is unused. In which case we also expect - that the reg_class field for it has not been set. */ - if (vreg_lrs[j].live_after == INVALID_INSTRNO) { - vassert(vreg_lrs[j].reg_class == HRcINVALID); - continue; - } - - /* The spill slots are 64 bits in size. As per the comment on - definition of HRegClass in host_generic_regs.h, that means, - to spill a vreg of class Flt64 or Vec128, we'll need to find - two adjacent spill slots to use. For Vec256, we'll need to - find four adjacent slots to use. Note, this logic needs to - kept in sync with the size info on the definition of - HRegClass. */ - Int ss_no = -1; - switch (vreg_lrs[j].reg_class) { - - case HRcVec128: case HRcFlt64: - /* Find two adjacent free slots in which between them - provide up to 128 bits in which to spill the vreg. - Since we are trying to find an even:odd pair, move - along in steps of 2 (slots). */ - for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2) - if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after - && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after) - break; - if (ss_no >= N_SPILL64S-1) { - vpanic("LibVEX_N_SPILL_BYTES is too low. " - "Increase and recompile."); - } - ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before; - ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before; - break; - - default: - /* The ordinary case -- just find a single spill slot. */ - /* Find the lowest-numbered spill slot which is available - at the start point of this interval, and assign the - interval to it. */ - for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) - if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after) - break; - if (ss_no == N_SPILL64S) { - vpanic("LibVEX_N_SPILL_BYTES is too low. " - "Increase and recompile."); - } - ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before; - break; - - } /* switch (vreg_lrs[j].reg_class) */ - - /* This reflects LibVEX's hard-wired knowledge of the baseBlock - layout: the guest state, then two equal sized areas following - it for two sets of shadow state, and then the spill area. */ - vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8); - - /* Independent check that we've made a sane choice of slot */ - sanity_check_spill_offset( &vreg_lrs[j] ); - /* if (j > max_ss_no) */ - /* max_ss_no = j; */ - } - - if (0) { - vex_printf("\n\n"); - for (Int j = 0; j < n_vregs; j++) - vex_printf("vreg %d --> spill offset %d\n", - j, vreg_lrs[j].spill_offset); - } - - /* --------- Stage 4: establish rreg preferences --------- */ - - /* It may be advantageous to allocating certain vregs to specific - rregs, as a way of avoiding reg-reg moves later. Here we - establish which, if any, rreg each vreg would prefer to be in. - Note that this constrains the allocator -- ideally we end up - with as few as possible vregs expressing a preference. - - This is an optimisation: if the .preferred_rreg field is never - set to anything different from INVALID_HREG, the allocator still - works. */ - - /* 30 Dec 04: removed this mechanism as it does not seem to - help. */ - - /* --------- Stage 5: process instructions --------- */ - - /* This is the main loop of the allocator. First, we need to - correctly set up our running state, which tracks the status of - each real register. */ - - /* ------ BEGIN: Process each insn in turn. ------ */ - - for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - - if (DEBUG_REGALLOC) { - vex_printf("\n====----====---- Insn %d ----====----====\n", ii); - vex_printf("---- "); - con->ppInstr(instrs_in->arr[ii], con->mode64); - vex_printf("\n\nInitial state:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------------ Sanity checks ------------ */ - - /* Sanity checks are expensive. So they are done only once - every 17 instructions, and just before the last - instruction. */ - do_sanity_check - = toBool( - False /* Set to True for sanity checking of all insns. */ - || ii == instrs_in->arr_used-1 - || (ii > 0 && (ii % 17) == 0) - ); - - if (do_sanity_check) { - - /* Sanity check 1: all rregs with a hard live range crossing - this insn must be marked as unavailable in the running - state. */ - for (Int j = 0; j < rreg_lrs_used; j++) { - if (rreg_lrs_la[j].live_after < ii - && ii < rreg_lrs_la[j].dead_before) { - /* ii is the middle of a hard live range for some real - reg. Check it's marked as such in the running - state. */ - HReg reg = rreg_lrs_la[j].rreg; - - if (0) { - vex_printf("considering la %d .. db %d reg = ", - rreg_lrs_la[j].live_after, - rreg_lrs_la[j].dead_before); - con->ppReg(reg); - vex_printf("\n"); - } - - /* assert that this rreg is marked as unavailable */ - vassert(!hregIsVirtual(reg)); - vassert(rreg_state[hregIndex(reg)].disp == Unavail); - } - } - - /* Sanity check 2: conversely, all rregs marked as - unavailable in the running rreg_state must have a - corresponding hard live range entry in the rreg_lrs - array. */ - for (Int j = 0; j < n_rregs; j++) { - vassert(rreg_state[j].disp == Bound - || rreg_state[j].disp == Free - || rreg_state[j].disp == Unavail); - if (rreg_state[j].disp != Unavail) - continue; - Int k; - for (k = 0; k < rreg_lrs_used; k++) { - HReg reg = rreg_lrs_la[k].rreg; - vassert(!hregIsVirtual(reg)); - if (hregIndex(reg) == j - && rreg_lrs_la[k].live_after < ii - && ii < rreg_lrs_la[k].dead_before) - break; - } - /* If this vassertion fails, we couldn't find a - corresponding HLR. */ - vassert(k < rreg_lrs_used); - } - - /* Sanity check 3: all vreg-rreg bindings must bind registers - of the same class. */ - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) { - vassert(rreg_state[j].eq_spill_slot == False); - continue; - } - vassert(hregClass(con->univ->regs[j]) - == hregClass(rreg_state[j].vreg)); - vassert( hregIsVirtual(rreg_state[j].vreg)); - } - - /* Sanity check 4: the vreg_state and rreg_state - mutually-redundant mappings are consistent. If - rreg_state[j].vreg points at some vreg_state entry then - that vreg_state entry should point back at - rreg_state[j]. */ - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) - continue; - Int k = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(k)); - vassert(vreg_state[k] == j); - } - for (Int j = 0; j < n_vregs; j++) { - Int k = vreg_state[j]; - if (k == INVALID_RREG_NO) - continue; - vassert(IS_VALID_RREGNO(k)); - vassert(rreg_state[k].disp == Bound); - vassert(hregIndex(rreg_state[k].vreg) == j); - } - - } /* if (do_sanity_check) */ - - /* ------------ end of Sanity checks ------------ */ - - /* Do various optimisations pertaining to register coalescing - and preferencing: - MOV v <-> v coalescing (done here). - MOV v <-> r coalescing (not yet, if ever) - */ - /* If doing a reg-reg move between two vregs, and the src's live - range ends here and the dst's live range starts here, bind - the dst to the src's rreg, and that's all. */ - HReg vregS = INVALID_HREG; - HReg vregD = INVALID_HREG; - if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) { - if (!hregIsVirtual(vregS)) goto cannot_coalesce; - if (!hregIsVirtual(vregD)) goto cannot_coalesce; - /* Check that *isMove is not telling us a bunch of lies ... */ - vassert(hregClass(vregS) == hregClass(vregD)); - Int k = hregIndex(vregS); - Int m = hregIndex(vregD); - vassert(IS_VALID_VREGNO(k)); - vassert(IS_VALID_VREGNO(m)); - if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; - if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; - if (DEBUG_REGALLOC) { - vex_printf("COALESCE "); - con->ppReg(vregS); - vex_printf(" -> "); - con->ppReg(vregD); - vex_printf("\n\n"); - } - /* Find the state entry for vregS. */ - Int n = vreg_state[k]; /* k is the index of vregS */ - if (n == INVALID_RREG_NO) { - /* vregS is not currently in a real register. So we can't - do the coalescing. Give up. */ - goto cannot_coalesce; - } - vassert(IS_VALID_RREGNO(n)); - - /* Finally, we can do the coalescing. It's trivial -- merely - claim vregS's register for vregD. */ - rreg_state[n].vreg = vregD; - vassert(IS_VALID_VREGNO(hregIndex(vregD))); - vassert(IS_VALID_VREGNO(hregIndex(vregS))); - vreg_state[hregIndex(vregD)] = toShort(n); - vreg_state[hregIndex(vregS)] = INVALID_RREG_NO; - - /* This rreg has become associated with a different vreg and - hence with a different spill slot. Play safe. */ - rreg_state[n].eq_spill_slot = False; - - /* Move on to the next insn. We skip the post-insn stuff for - fixed registers, since this move should not interact with - them in any way. */ - continue; - } - cannot_coalesce: - - /* ------ Free up rregs bound to dead vregs ------ */ - - /* Look for vregs whose live range has just ended, and - mark the associated rreg as free. */ - - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) - continue; - UInt vregno = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(vregno)); - if (vreg_lrs[vregno].dead_before <= ii) { - rreg_state[j].disp = Free; - rreg_state[j].eq_spill_slot = False; - Int m = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(m)); - vreg_state[m] = INVALID_RREG_NO; - if (DEBUG_REGALLOC) { - vex_printf("free up "); - con->ppReg(con->univ->regs[j]); - vex_printf("\n"); - } - } - } - - /* ------ Pre-instruction actions for fixed rreg uses ------ */ - - /* Now we have to deal with rregs which are about to be made - live by this instruction -- in other words, are entering into - one of their live ranges. If any such rreg holds a vreg, we - will have to free up the rreg. The simplest solution which - is correct is to spill the rreg. - - Note we could do better: - * Could move it into some other free rreg, if one is available - - Do this efficiently, by incrementally stepping along an array - of rreg HLRs that are known to be sorted by start point - (their .live_after field). - */ - while (True) { - vassert(rreg_lrs_la_next >= 0); - vassert(rreg_lrs_la_next <= rreg_lrs_used); - if (rreg_lrs_la_next == rreg_lrs_used) - break; /* no more real reg live ranges to consider */ - if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) - break; /* next live range does not yet start */ - vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); - /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. - Find the associated rreg_state entry. */ - /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. - Real register live ranges are guaranteed to be well-formed - in that they start with a write to the register -- Stage 2 - rejects any code not satisfying this. So the correct - question to ask is whether - rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, - whether the reg becomes live after this insn -- rather - than before it. */ - if (DEBUG_REGALLOC) { - vex_printf("need to free up rreg: "); - con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg); - vex_printf("\n\n"); - } - Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); - - /* If this fails, we don't have an entry for this rreg. - Which we should. */ - vassert(IS_VALID_RREGNO(k)); - Int m = hregIndex(rreg_state[k].vreg); - if (rreg_state[k].disp == Bound) { - /* Yes, there is an associated vreg. Spill it if it's - still live. */ - vassert(IS_VALID_VREGNO(m)); - vreg_state[m] = INVALID_RREG_NO; - if (vreg_lrs[m].dead_before > ii) { - vassert(vreg_lrs[m].reg_class != HRcINVALID); - if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { - HInstr* spill1 = NULL; - HInstr* spill2 = NULL; - con->genSpill(&spill1, &spill2, con->univ->regs[k], - vreg_lrs[m].spill_offset, con->mode64); - vassert(spill1 || spill2); /* can't both be NULL */ - if (spill1) - EMIT_INSTR(spill1); - if (spill2) - EMIT_INSTR(spill2); - } - rreg_state[k].eq_spill_slot = True; - } - } - rreg_state[k].disp = Unavail; - rreg_state[k].vreg = INVALID_HREG; - rreg_state[k].eq_spill_slot = False; - - /* check for further rregs entering HLRs at this point */ - rreg_lrs_la_next++; - } - - if (DEBUG_REGALLOC) { - vex_printf("After pre-insn actions for fixed regs:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------ Deal with the current instruction. ------ */ - - /* Finally we can begin the processing of this instruction - itself. The aim is to free up enough rregs for this insn. - This may generate spill stores since we may have to evict - some vregs currently in rregs. Also generates spill loads. - We also build up the final vreg->rreg mapping to be applied - to the insn. */ - - initHRegRemap(&remap); - - /* ------------ BEGIN directReload optimisation ----------- */ - - /* If the instruction reads exactly one vreg which is currently - in a spill slot, and this is last use of that vreg, see if we - can convert the instruction into one that reads directly from - the spill slot. This is clearly only possible for x86 and - amd64 targets, since ppc and arm are load-store - architectures. If successful, replace instrs_in->arr[ii] - with this new instruction, and recompute its reg usage, so - that the change is invisible to the standard-case handling - that follows. */ - - if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) { - Bool debug_direct_reload = False; - HReg cand = INVALID_HREG; - Bool nreads = 0; - Short spilloff = 0; - - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - if (reg_usage_arr[ii].vMode[j] == HRmRead) { - nreads++; - Int m = hregIndex(vreg); - vassert(IS_VALID_VREGNO(m)); - Int k = vreg_state[m]; - if (!IS_VALID_RREGNO(k)) { - /* ok, it is spilled. Now, is this its last use? */ - vassert(vreg_lrs[m].dead_before >= ii+1); - if (vreg_lrs[m].dead_before == ii+1 - && hregIsInvalid(cand)) { - spilloff = vreg_lrs[m].spill_offset; - cand = vreg; - } - } - } - } - - if (nreads == 1 && ! hregIsInvalid(cand)) { - HInstr* reloaded; - if (reg_usage_arr[ii].n_vRegs == 2) - vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], - reg_usage_arr[ii].vRegs[1])); - - reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff); - if (debug_direct_reload && !reloaded) { - vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); - con->ppInstr(instrs_in->arr[ii], con->mode64); - } - if (reloaded) { - /* Update info about the insn, so it looks as if it had - been in this form all along. */ - instrs_in->arr[ii] = reloaded; - con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], - con->mode64); - if (debug_direct_reload && !reloaded) { - vex_printf(" --> "); - con->ppInstr(reloaded, con->mode64); - } - } - - if (debug_direct_reload && !reloaded) - vex_printf("\n"); - } - - } - - /* ------------ END directReload optimisation ------------ */ - - /* for each virtual reg mentioned in the insn ... */ - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - if (0) { - vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); - } - - /* Now we're trying to find a rreg for "vreg". First of all, - if it already has an rreg assigned, we don't need to do - anything more. Inspect the current state to find out. */ - Int m = hregIndex(vreg); - vassert(IS_VALID_VREGNO(m)); - Int n = vreg_state[m]; - if (IS_VALID_RREGNO(n)) { - vassert(rreg_state[n].disp == Bound); - addToHRegRemap(&remap, vreg, con->univ->regs[n]); - /* If this rreg is written or modified, mark it as different - from any spill slot value. */ - if (reg_usage_arr[ii].vMode[j] != HRmRead) - rreg_state[n].eq_spill_slot = False; - continue; - } else { - vassert(n == INVALID_RREG_NO); - } - - /* No luck. The next thing to do is see if there is a - currently free rreg available, of the correct class. If - so, bag it. NOTE, we could improve this by selecting an - rreg for which the next live-range event is as far ahead - as possible. */ - Int k_suboptimal = -1; - Int k; - for (k = 0; k < n_rregs; k++) { - if (rreg_state[k].disp != Free - || hregClass(con->univ->regs[k]) != hregClass(vreg)) - continue; - if (rreg_state[k].has_hlrs) { - /* Well, at least we can use k_suboptimal if we really - have to. Keep on looking for a better candidate. */ - k_suboptimal = k; - } else { - /* Found a preferable reg. Use it. */ - k_suboptimal = -1; - break; - } - } - if (k_suboptimal >= 0) - k = k_suboptimal; - - if (k < n_rregs) { - rreg_state[k].disp = Bound; - rreg_state[k].vreg = vreg; - Int p = hregIndex(vreg); - vassert(IS_VALID_VREGNO(p)); - vreg_state[p] = toShort(k); - addToHRegRemap(&remap, vreg, con->univ->regs[k]); - /* Generate a reload if needed. This only creates needed - reloads because the live range builder for vregs will - guarantee that the first event for a vreg is a write. - Hence, if this reference is not a write, it cannot be - the first reference for this vreg, and so a reload is - indeed needed. */ - if (reg_usage_arr[ii].vMode[j] != HRmWrite) { - vassert(vreg_lrs[p].reg_class != HRcINVALID); - HInstr* reload1 = NULL; - HInstr* reload2 = NULL; - con->genReload(&reload1, &reload2, con->univ->regs[k], - vreg_lrs[p].spill_offset, con->mode64); - vassert(reload1 || reload2); /* can't both be NULL */ - if (reload1) - EMIT_INSTR(reload1); - if (reload2) - EMIT_INSTR(reload2); - /* This rreg is read or modified by the instruction. - If it's merely read we can claim it now equals the - spill slot, but not so if it is modified. */ - if (reg_usage_arr[ii].vMode[j] == HRmRead) { - rreg_state[k].eq_spill_slot = True; - } else { - vassert(reg_usage_arr[ii].vMode[j] == HRmModify); - rreg_state[k].eq_spill_slot = False; - } - } else { - rreg_state[k].eq_spill_slot = False; - } - - continue; - } - - /* Well, now we have no option but to spill a vreg. It's - important to make a good choice of vreg to spill, and of - course we need to be careful not to spill a vreg which is - needed by this insn. */ - - /* First, mark in the rreg_state, those rregs which are not spill - candidates, due to holding a vreg mentioned by this - instruction. Or being of the wrong class. */ - for (k = 0; k < n_rregs; k++) { - rreg_state[k].is_spill_cand = False; - if (rreg_state[k].disp != Bound) - continue; - if (hregClass(con->univ->regs[k]) != hregClass(vreg)) - continue; - rreg_state[k].is_spill_cand = True; - /* Note, the following loop visits only the virtual regs - mentioned by the instruction. */ - for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) { - if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) { - rreg_state[k].is_spill_cand = False; - break; - } - } - } - - /* We can choose to spill any rreg satisfying - rreg_state[r].is_spill_cand (so to speak). Choose r so that - the next use of its associated vreg is as far ahead as - possible, in the hope that this will minimise the number - of consequent reloads required. */ - Int spillee - = findMostDistantlyMentionedVReg ( - reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs ); - - if (spillee == -1) { - /* Hmmmmm. There don't appear to be any spill candidates. - We're hosed. */ - vex_printf("reg_alloc: can't find a register in class: "); - ppHRegClass(hregClass(vreg)); - vex_printf("\n"); - vpanic("reg_alloc: can't create a free register."); - } - - /* Right. So we're going to spill rreg_state[spillee]. */ - vassert(IS_VALID_RREGNO(spillee)); - vassert(rreg_state[spillee].disp == Bound); - /* check it's the right class */ - vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg)); - /* check we're not ejecting the vreg for which we are trying - to free up a register. */ - vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); - - m = hregIndex(rreg_state[spillee].vreg); - vassert(IS_VALID_VREGNO(m)); - - /* So here's the spill store. Assert that we're spilling a - live vreg. */ - vassert(vreg_lrs[m].dead_before > ii); - vassert(vreg_lrs[m].reg_class != HRcINVALID); - if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { - HInstr* spill1 = NULL; - HInstr* spill2 = NULL; - con->genSpill(&spill1, &spill2, con->univ->regs[spillee], - vreg_lrs[m].spill_offset, con->mode64); - vassert(spill1 || spill2); /* can't both be NULL */ - if (spill1) - EMIT_INSTR(spill1); - if (spill2) - EMIT_INSTR(spill2); - } - - /* Update the rreg_state to reflect the new assignment for this - rreg. */ - rreg_state[spillee].vreg = vreg; - vreg_state[m] = INVALID_RREG_NO; - - rreg_state[spillee].eq_spill_slot = False; /* be safe */ - - m = hregIndex(vreg); - vassert(IS_VALID_VREGNO(m)); - vreg_state[m] = toShort(spillee); - - /* Now, if this vreg is being read or modified (as opposed to - written), we have to generate a reload for it. */ - if (reg_usage_arr[ii].vMode[j] != HRmWrite) { - vassert(vreg_lrs[m].reg_class != HRcINVALID); - HInstr* reload1 = NULL; - HInstr* reload2 = NULL; - con->genReload(&reload1, &reload2, con->univ->regs[spillee], - vreg_lrs[m].spill_offset, con->mode64); - vassert(reload1 || reload2); /* can't both be NULL */ - if (reload1) - EMIT_INSTR(reload1); - if (reload2) - EMIT_INSTR(reload2); - /* This rreg is read or modified by the instruction. - If it's merely read we can claim it now equals the - spill slot, but not so if it is modified. */ - if (reg_usage_arr[ii].vMode[j] == HRmRead) { - rreg_state[spillee].eq_spill_slot = True; - } else { - vassert(reg_usage_arr[ii].vMode[j] == HRmModify); - rreg_state[spillee].eq_spill_slot = False; - } - } - - /* So after much twisting and turning, we have vreg mapped to - rreg_state[spillee].rreg. Note that in the map. */ - addToHRegRemap(&remap, vreg, con->univ->regs[spillee]); - - } /* iterate over virtual registers in this instruction. */ - - /* We've finished clowning around with registers in this instruction. - Three results: - - the running rreg_state[] has been updated - - a suitable vreg->rreg mapping for this instruction has been - constructed - - spill and reload instructions may have been emitted. - - The final step is to apply the mapping to the instruction, - and emit that. - */ - - /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ - con->mapRegs(&remap, instrs_in->arr[ii], con->mode64); - EMIT_INSTR( instrs_in->arr[ii] ); - - if (DEBUG_REGALLOC) { - vex_printf("After dealing with current insn:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------ Post-instruction actions for fixed rreg uses ------ */ - - /* Now we need to check for rregs exiting fixed live ranges - after this instruction, and if so mark them as free. */ - while (True) { - vassert(rreg_lrs_db_next >= 0); - vassert(rreg_lrs_db_next <= rreg_lrs_used); - if (rreg_lrs_db_next == rreg_lrs_used) - break; /* no more real reg live ranges to consider */ - if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) - break; /* next live range does not yet start */ - vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); - /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live - range. Mark it as such in the main rreg_state array. */ - HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg; - vassert(!hregIsVirtual(reg)); - Int k = hregIndex(reg); - vassert(IS_VALID_RREGNO(k)); - vassert(rreg_state[k].disp == Unavail); - rreg_state... [truncated message content] |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:50
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=454be2ee1059086c15d188610fa30d99a09b3bbf commit 454be2ee1059086c15d188610fa30d99a09b3bbf Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 14:12:39 2017 +0200 Support If-Then-Else in Memcheck. Diff: --- memcheck/mc_translate.c | 791 +++++++++++++++++++++++++++--------------------- 1 file changed, 453 insertions(+), 338 deletions(-) diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 980c1d7..f5a9830 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -174,30 +174,10 @@ typedef TempMapEnt; -/* Carries around state during memcheck instrumentation. */ typedef - struct _MCEnv { - /* MODIFIED: the superblock being constructed. IRStmts are - added. */ - IRSB* sb; + struct _MCEnvSettings { Bool trace; - /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the - current kind and possibly shadow temps for each temp in the - IRSB being constructed. Note that it does not contain the - type of each tmp. If you want to know the type, look at the - relevant entry in sb->tyenv. It follows that at all times - during the instrumentation process, the valid indices for - tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is - total number of Orig, V- and B- temps allocated so far. - - The reason for this strange split (types in one place, all - other info in another) is that we need the types to be - attached to sb so as to make it possible to do - "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the - instrumentation process. */ - XArray* /* of TempMapEnt */ tmpMap; - /* MODIFIED: indicates whether "bogus" literals have so far been found. Starts off False, and may change to True. */ Bool bogusLiterals; @@ -217,6 +197,37 @@ typedef Ity_I32 or Ity_I64 only. */ IRType hWordTy; } + MCEnvSettings; + +/* Carries around state corresponding to one IRStmtVec during Memcheck + instrumentation. */ +typedef + struct _MCEnv { + /* MODIFIED: the stmts being constructed. IRStmts are added. */ + IRStmtVec* stmts; + IRTypeEnv* tyenv; + UInt depth; /* for indenting properly nested statements */ + + /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the + current kind and possibly shadow temps for each temp in the + IRStmtVec being constructed. Note that it does not contain the + type of each tmp. If you want to know the type, look at the + relevant entry in tyenv. It follows that at all times + during the instrumentation process, the valid indices for + tmpMap and tyenv are identical, being 0 .. N-1 where N is + total number of Orig, V- and B- temps allocated so far. + + The reason for this strange split (types in one place, all + other info in another) is that we need the types to be + attached to sb so as to make it possible to do + "typeOfIRExpr(mce->tyenv, ...)" at various places in the + instrumentation process. */ + XArray* /* of TempMapEnt */ tmpMap; + + struct _MCEnv* parent; + + MCEnvSettings* settings; + } MCEnv; /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on @@ -251,12 +262,12 @@ static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) { Word newIx; TempMapEnt ent; - IRTemp tmp = newIRTemp(mce->sb->tyenv, ty); + IRTemp tmp = newIRTemp(mce->tyenv, mce->stmts, ty); ent.kind = kind; ent.shadowV = IRTemp_INVALID; ent.shadowB = IRTemp_INVALID; newIx = VG_(addToXA)( mce->tmpMap, &ent ); - tl_assert(newIx == (Word)tmp); + tl_assert(newIx == tmp); return tmp; } @@ -265,17 +276,14 @@ static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind ) so far exists, allocate one. */ static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) { - TempMapEnt* ent; - /* VG_(indexXA) range-checks 'orig', hence no need to check - here. */ - ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); + /* VG_(indexXA) range-checks 'orig', hence no need to check here. */ + TempMapEnt* ent = (TempMapEnt*) VG_(indexXA)(mce->tmpMap, orig); tl_assert(ent->kind == Orig); if (ent->shadowV == IRTemp_INVALID) { - IRTemp tmpV - = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); + IRTemp tmpV = newTemp(mce, shadowTypeV(mce->tyenv->types[orig]), VSh); /* newTemp may cause mce->tmpMap to resize, hence previous results from VG_(indexXA) are invalid. */ - ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); + ent = (TempMapEnt*) VG_(indexXA)(mce->tmpMap, orig); tl_assert(ent->kind == Orig); tl_assert(ent->shadowV == IRTemp_INVALID); ent->shadowV = tmpV; @@ -295,22 +303,58 @@ static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig ) regardless. */ static void newShadowTmpV ( MCEnv* mce, IRTemp orig ) { - TempMapEnt* ent; /* VG_(indexXA) range-checks 'orig', hence no need to check here. */ - ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); + TempMapEnt* ent = (TempMapEnt*) VG_(indexXA)(mce->tmpMap, orig); tl_assert(ent->kind == Orig); if (1) { - IRTemp tmpV - = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh ); + IRTemp tmpV = newTemp(mce, shadowTypeV(mce->tyenv->types[orig]), VSh); /* newTemp may cause mce->tmpMap to resize, hence previous results from VG_(indexXA) are invalid. */ - ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); + ent = (TempMapEnt*) VG_(indexXA)(mce->tmpMap, orig); tl_assert(ent->kind == Orig); ent->shadowV = tmpV; } } +/* Set up the running environment. Both .stmts and .tmpMap are modified as we go + along. Note that tmps are added to both .tyenv and .tmpMap together, so the + valid index-set for those two arrays should always be identical. */ +static void initMCEnv(IRTypeEnv* tyenv, IRStmtVec* stmts_in, MCEnv* mce, + MCEnv* parent_mce) +{ + IRStmtVec* stmts_out = emptyIRStmtVec(); + stmts_out->parent = (parent_mce != NULL) ? parent_mce->stmts : NULL; + stmts_out->id = stmts_in->id; + stmts_out->defset = deepCopyIRTempDefSet(stmts_in->defset); + + mce->stmts = stmts_out; + mce->tyenv = tyenv; + mce->depth = (parent_mce != NULL) ? parent_mce->depth + 1 : 0; + mce->parent = parent_mce; + mce->settings = (parent_mce != NULL) ? parent_mce->settings : NULL; + + mce->tmpMap = VG_(newXA)(VG_(malloc), "mc.createMCEnv.1", VG_(free), + sizeof(TempMapEnt)); + VG_(hintSizeXA)(mce->tmpMap, mce->tyenv->used); + for (UInt i = 0; i < mce->tyenv->used; i++) { + TempMapEnt ent; + ent.kind = Orig; + ent.shadowV = IRTemp_INVALID; + ent.shadowB = IRTemp_INVALID; + VG_(addToXA)(mce->tmpMap, &ent); + } + tl_assert(VG_(sizeXA)(mce->tmpMap) == tyenv->used); +} + +static void deinitMCEnv(MCEnv* mce) +{ + /* If this fails, there's been some serious snafu with tmp management, + that should be investigated. */ + tl_assert(VG_(sizeXA)(mce->tmpMap) == mce->tyenv->used); + VG_(deleteXA)(mce->tmpMap); +} + /*------------------------------------------------------------*/ /*--- IRAtoms -- a subset of IRExprs ---*/ @@ -332,7 +376,7 @@ static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) if (a1->tag == Iex_Const) return True; if (a1->tag == Iex_RdTmp) { - TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); + TempMapEnt* ent = VG_(indexXA)(mce->tmpMap, a1->Iex.RdTmp.tmp); return ent->kind == Orig; } return False; @@ -345,7 +389,7 @@ static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) if (a1->tag == Iex_Const) return True; if (a1->tag == Iex_RdTmp) { - TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp ); + TempMapEnt* ent = VG_(indexXA)(mce->tmpMap, a1->Iex.RdTmp.tmp); return ent->kind == VSh || ent->kind == BSh; } return False; @@ -418,12 +462,12 @@ static IRExpr* definedOfType ( IRType ty ) { /* add stmt to a bb */ static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) { - if (mce->trace) { + if (mce->settings->trace) { VG_(printf)(" %c: ", cat); - ppIRStmt(st); + ppIRStmt(st, mce->tyenv, 0); VG_(printf)("\n"); } - addStmtToIRSB(mce->sb, st); + addStmtToIRStmtVec(mce->stmts, st); } /* assign value to tmp */ @@ -432,6 +476,16 @@ void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) { stmt(cat, mce, IRStmt_WrTmp(tmp,expr)); } +static void phi(HChar cat, MCEnv* mce, IRPhiVec* phi_nodes, IRPhi *phi) +{ + if (mce->settings->trace) { + VG_(printf)(" %c: ", cat); + ppIRPhi(phi); + VG_(printf)("\n"); + } + addIRPhiToIRPhiVec(phi_nodes, phi); +} + /* build various kinds of expressions */ #define triop(_op, _arg1, _arg2, _arg3) \ IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3)) @@ -457,7 +511,7 @@ static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) { TempKind k; IRTemp t; - IRType tyE = typeOfIRExpr(mce->sb->tyenv, e); + IRType tyE = typeOfIRExpr(mce->tyenv, e); tl_assert(tyE == ty); /* so 'ty' is redundant (!) */ switch (cat) { @@ -761,7 +815,7 @@ static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) /* Note, dst_ty is a shadow type, not an original type. */ tl_assert(isShadowAtom(mce,vbits)); - src_ty = typeOfIRExpr(mce->sb->tyenv, vbits); + src_ty = typeOfIRExpr(mce->tyenv, vbits); /* Fast-track some common cases */ if (src_ty == Ity_I32 && dst_ty == Ity_I32) @@ -1185,16 +1239,19 @@ static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */ behaviour of all 'emit-a-complaint' style functions we might call. */ -static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { +static void setHelperAnns ( MCEnv* mce, IRDirty* di ) +{ + const VexGuestLayout* layout = mce->settings->layout; + di->nFxState = 2; di->fxState[0].fx = Ifx_Read; - di->fxState[0].offset = mce->layout->offset_SP; - di->fxState[0].size = mce->layout->sizeof_SP; + di->fxState[0].offset = layout->offset_SP; + di->fxState[0].size = layout->sizeof_SP; di->fxState[0].nRepeats = 0; di->fxState[0].repeatLen = 0; di->fxState[1].fx = Ifx_Read; - di->fxState[1].offset = mce->layout->offset_IP; - di->fxState[1].size = mce->layout->sizeof_IP; + di->fxState[1].offset = layout->offset_IP; + di->fxState[1].size = layout->sizeof_IP; di->fxState[1].nRepeats = 0; di->fxState[1].repeatLen = 0; } @@ -1248,7 +1305,7 @@ static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) tl_assert(isShadowAtom(mce, vatom)); tl_assert(sameKindedAtoms(atom, vatom)); - ty = typeOfIRExpr(mce->sb->tyenv, vatom); + ty = typeOfIRExpr(mce->tyenv, vatom); /* sz is only used for constructing the error message */ sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); @@ -1261,7 +1318,7 @@ static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) zero origin. */ if (MC_(clo_mc_level) == 3) { origin = schemeE( mce, atom ); - if (mce->hWordTy == Ity_I64) { + if (mce->settings->hWordTy == Ity_I64) { origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) ); } } else { @@ -1408,15 +1465,16 @@ static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard ) */ static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) { + const VexGuestLayout* layout = mce->settings->layout; Int minoffD, maxoffD, i; Int minoff = offset; Int maxoff = minoff + size - 1; tl_assert((minoff & ~0xFFFF) == 0); tl_assert((maxoff & ~0xFFFF) == 0); - for (i = 0; i < mce->layout->n_alwaysDefd; i++) { - minoffD = mce->layout->alwaysDefd[i].offset; - maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; + for (i = 0; i < layout->n_alwaysDefd; i++) { + minoffD = layout->alwaysDefd[i].offset; + maxoffD = minoffD + layout->alwaysDefd[i].size - 1; tl_assert((minoffD & ~0xFFFF) == 0); tl_assert((maxoffD & ~0xFFFF) == 0); @@ -1442,6 +1500,7 @@ static void do_shadow_PUT ( MCEnv* mce, Int offset, IRAtom* atom, IRAtom* vatom, IRExpr *guard ) { + const VexGuestLayout* layout = mce->settings->layout; IRType ty; // Don't do shadow PUTs if we're not doing undefined value checking. @@ -1459,7 +1518,7 @@ void do_shadow_PUT ( MCEnv* mce, Int offset, tl_assert(isShadowAtom(mce, vatom)); } - ty = typeOfIRExpr(mce->sb->tyenv, vatom); + ty = typeOfIRExpr(mce->tyenv, vatom); tl_assert(ty != Ity_I1); if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { /* later: no ... */ @@ -1474,10 +1533,10 @@ void do_shadow_PUT ( MCEnv* mce, Int offset, cond = assignNew('V', mce, Ity_I1, guard); iffalse = assignNew('V', mce, ty, - IRExpr_Get(offset + mce->layout->total_sizeB, ty)); + IRExpr_Get(offset + layout->total_sizeB, ty)); vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse)); } - stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom )); + stmt( 'V', mce, IRStmt_Put( offset + layout->total_sizeB, vatom )); } } @@ -1519,7 +1578,7 @@ void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti) /* Do a cloned version of the Put that refers to the shadow area. */ IRRegArray* new_descr - = mkIRRegArray( descr->base + mce->layout->total_sizeB, + = mkIRRegArray( descr->base + mce->settings->layout->total_sizeB, tyS, descr->nElems); stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) )); } @@ -1542,7 +1601,7 @@ IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) /* return a cloned version of the Get that refers to the shadow area. */ /* FIXME: this isn't an atom! */ - return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); + return IRExpr_Get( offset + mce->settings->layout->total_sizeB, tyS ); } } @@ -1567,7 +1626,7 @@ IRExpr* shadow_GETI ( MCEnv* mce, /* return a cloned version of the Get that refers to the shadow area. */ IRRegArray* new_descr - = mkIRRegArray( descr->base + mce->layout->total_sizeB, + = mkIRRegArray( descr->base + mce->settings->layout->total_sizeB, tyS, descr->nElems); return IRExpr_GetI( new_descr, ix, bias ); } @@ -1586,8 +1645,8 @@ static IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) { IRAtom* at; - IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); - IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); + IRType t1 = typeOfIRExpr(mce->tyenv, va1); + IRType t2 = typeOfIRExpr(mce->tyenv, va2); tl_assert(isShadowAtom(mce,va1)); tl_assert(isShadowAtom(mce,va2)); @@ -1643,9 +1702,9 @@ IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2, IRAtom* va3 ) { IRAtom* at; - IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); - IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); - IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); + IRType t1 = typeOfIRExpr(mce->tyenv, va1); + IRType t2 = typeOfIRExpr(mce->tyenv, va2); + IRType t3 = typeOfIRExpr(mce->tyenv, va3); tl_assert(isShadowAtom(mce,va1)); tl_assert(isShadowAtom(mce,va2)); tl_assert(isShadowAtom(mce,va3)); @@ -1777,10 +1836,10 @@ IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 ) { IRAtom* at; - IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1); - IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2); - IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3); - IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4); + IRType t1 = typeOfIRExpr(mce->tyenv, va1); + IRType t2 = typeOfIRExpr(mce->tyenv, va2); + IRType t3 = typeOfIRExpr(mce->tyenv, va3); + IRType t4 = typeOfIRExpr(mce->tyenv, va4); tl_assert(isShadowAtom(mce,va1)); tl_assert(isShadowAtom(mce,va2)); tl_assert(isShadowAtom(mce,va3)); @@ -1879,7 +1938,7 @@ IRAtom* mkLazyN ( MCEnv* mce, tl_assert(isOriginalAtom(mce, exprvec[i])); if (cee->mcx_mask & (1<<i)) continue; - if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64) + if (typeOfIRExpr(mce->tyenv, exprvec[i]) != Ity_I64) mergeTy64 = False; } @@ -2971,6 +3030,7 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, IROp op, IRAtom* atom1, IRAtom* atom2 ) { + const MCEnvSettings* settings = mce->settings; IRType and_or_ty; IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); @@ -4038,13 +4098,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, return mkLazy2(mce, Ity_I64, vatom1, vatom2); case Iop_Add32: - if (mce->bogusLiterals || mce->useLLVMworkarounds) + if (settings->bogusLiterals || settings->useLLVMworkarounds) return expensiveAddSub(mce,True,Ity_I32, vatom1,vatom2, atom1,atom2); else goto cheap_AddSub32; case Iop_Sub32: - if (mce->bogusLiterals) + if (settings->bogusLiterals) return expensiveAddSub(mce,False,Ity_I32, vatom1,vatom2, atom1,atom2); else @@ -4061,13 +4121,13 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2); case Iop_Add64: - if (mce->bogusLiterals || mce->useLLVMworkarounds) + if (settings->bogusLiterals || settings->useLLVMworkarounds) return expensiveAddSub(mce,True,Ity_I64, vatom1,vatom2, atom1,atom2); else goto cheap_AddSub64; case Iop_Sub64: - if (mce->bogusLiterals) + if (settings->bogusLiterals) return expensiveAddSub(mce,False,Ity_I64, vatom1,vatom2, atom1,atom2); else @@ -4089,7 +4149,7 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_CmpEQ64: case Iop_CmpNE64: - if (mce->bogusLiterals) + if (settings->bogusLiterals) goto expensive_cmp64; else goto cheap_cmp64; @@ -4105,7 +4165,7 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_CmpEQ32: case Iop_CmpNE32: - if (mce->bogusLiterals) + if (settings->bogusLiterals) goto expensive_cmp32; else goto cheap_cmp32; @@ -4794,7 +4854,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, } else { IROp mkAdd; IRAtom* eBias; - IRType tyAddr = mce->hWordTy; + IRType tyAddr = mce->settings->hWordTy; tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); @@ -4967,7 +5027,7 @@ IRAtom* expr2vbits_ITE ( MCEnv* mce, vbitsC = expr2vbits(mce, cond); vbits1 = expr2vbits(mce, iftrue); vbits0 = expr2vbits(mce, iffalse); - ty = typeOfIRExpr(mce->sb->tyenv, vbits0); + ty = typeOfIRExpr(mce->tyenv, vbits0); return mkUifU(mce, ty, assignNew('V', mce, ty, @@ -4993,7 +5053,7 @@ IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) ); case Iex_Const: - return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e))); + return definedOfType(shadowTypeV(typeOfIRExpr(mce->tyenv, e))); case Iex_Qop: return expr2vbits_Qop( @@ -5058,8 +5118,8 @@ IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) /* vatom is vbits-value and as such can only have a shadow type. */ tl_assert(isShadowAtom(mce,vatom)); - ty = typeOfIRExpr(mce->sb->tyenv, vatom); - tyH = mce->hWordTy; + ty = typeOfIRExpr(mce->tyenv, vatom); + tyH = mce->settings->hWordTy; if (tyH == Ity_I32) { switch (ty) { @@ -5119,7 +5179,7 @@ void do_shadow_Store ( MCEnv* mce, const HChar* hname = NULL; IRConst* c; - tyAddr = mce->hWordTy; + tyAddr = mce->settings->hWordTy; mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); tl_assert( end == Iend_LE || end == Iend_BE ); @@ -5138,10 +5198,10 @@ void do_shadow_Store ( MCEnv* mce, if (guard) { tl_assert(isOriginalAtom(mce, guard)); - tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); + tl_assert(typeOfIRExpr(mce->tyenv, guard) == Ity_I1); } - ty = typeOfIRExpr(mce->sb->tyenv, vdata); + ty = typeOfIRExpr(mce->tyenv, vdata); // If we're not doing undefined value checking, pretend that this value // is "all valid". That lets Vex's optimiser remove some of the V bit @@ -5469,9 +5529,9 @@ void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) tl_assert(d->mAddr); complainIfUndefined(mce, d->mAddr, d->guard); - tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr); + tyAddr = typeOfIRExpr(mce->tyenv, d->mAddr); tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); - tl_assert(tyAddr == mce->hWordTy); /* not really right */ + tl_assert(tyAddr == mce->settings->hWordTy); /* not really right */ } /* Deal with memory inputs (reads or modifies) */ @@ -5520,7 +5580,7 @@ void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) /* Outputs: the destination temporary, if there is one. */ if (d->tmp != IRTemp_INVALID) { dst = findShadowTmpV(mce, d->tmp); - tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp); + tyDst = typeOfIRTemp(mce->tyenv, d->tmp); assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) ); } @@ -5852,7 +5912,7 @@ static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas ) tl_assert(cas->expdHi == NULL); tl_assert(cas->dataHi == NULL); - elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); + elemTy = typeOfIRExpr(mce->tyenv, cas->expdLo); switch (elemTy) { case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break; case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break; @@ -5946,7 +6006,7 @@ static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas ) tl_assert(cas->expdHi != NULL); tl_assert(cas->dataHi != NULL); - elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo); + elemTy = typeOfIRExpr(mce->tyenv, cas->expdLo); switch (elemTy) { case Ity_I8: opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8; @@ -6100,7 +6160,7 @@ static void do_shadow_LLSC ( MCEnv* mce, assignment of the loaded (shadow) data to the result temporary. Treat a store-conditional like a normal store, and mark the result temporary as defined. */ - IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult); + IRType resTy = typeOfIRTemp(mce->tyenv, stResult); IRTemp resTmp = findShadowTmpV(mce, stResult); tl_assert(isIRAtom(stAddr)); @@ -6121,7 +6181,7 @@ static void do_shadow_LLSC ( MCEnv* mce, } else { /* Store Conditional */ /* Stay sane */ - IRType dataTy = typeOfIRExpr(mce->sb->tyenv, + IRType dataTy = typeOfIRExpr(mce->tyenv, stStoredata); tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 || dataTy == Ity_I16 || dataTy == Ity_I8); @@ -6206,6 +6266,49 @@ static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg ) } +static void instrument_IRStmtVec(IRStmtVec* stmts_in, UInt stmts_in_first, + MCEnv* mce); + +static void do_shadow_IfThenElse(MCEnv* mce, IRExpr* cond, + IRIfThenElse_Hint hint, IRStmtVec* then_leg, + IRStmtVec* else_leg, IRPhiVec* phi_nodes_in) +{ + IRTemp (*findShadowTmp)(MCEnv* mce, IRTemp orig); + HChar category; + if (MC_(clo_mc_level) == 3) { + findShadowTmp = findShadowTmpB; + category = 'B'; + } else { + findShadowTmp = findShadowTmpV; + category = 'V'; + } + + complainIfUndefined(mce, cond, NULL); + + MCEnv then_mce; + initMCEnv(mce->tyenv, then_leg, &then_mce, mce); + instrument_IRStmtVec(then_leg, 0, &then_mce); + + MCEnv else_mce; + initMCEnv(mce->tyenv, else_leg, &else_mce, mce); + instrument_IRStmtVec(else_leg, 0, &else_mce); + + IRPhiVec* phi_nodes_out = emptyIRPhiVec(); + for (UInt i = 0; i < phi_nodes_in->phis_used; i++) { + IRPhi* phi_in = phi_nodes_in->phis[i]; + IRPhi* phi_shadow = mkIRPhi(findShadowTmp(mce, phi_in->dst), + findShadowTmp(&then_mce, phi_in->srcThen), + findShadowTmp(&else_mce, phi_in->srcElse)); + phi(category, mce, phi_nodes_out, phi_shadow); + phi('C', mce, phi_nodes_out, phi_in); + } + + stmt(category, mce, IRStmt_IfThenElse(cond, hint, then_mce.stmts, + else_mce.stmts, phi_nodes_out)); + deinitMCEnv(&then_mce); + deinitMCEnv(&else_mce); +} + /*------------------------------------------------------------*/ /*--- Memcheck main ---*/ /*------------------------------------------------------------*/ @@ -6249,7 +6352,9 @@ static Bool isBogusAtom ( IRAtom* at ) ); } -static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) +static Bool isBogusIRStmtVec(/*FLAT*/ IRStmtVec* stmts); + +static Bool isBogusIRStmt(/*FLAT*/ IRStmt* st) { Int i; IRExpr* e; @@ -6348,13 +6453,168 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) || (st->Ist.LLSC.storedata ? isBogusAtom(st->Ist.LLSC.storedata) : False); + case Ist_IfThenElse: + return isBogusAtom(st->Ist.IfThenElse.details->cond) + || isBogusIRStmtVec(st->Ist.IfThenElse.details->then_leg) + || isBogusIRStmtVec(st->Ist.IfThenElse.details->else_leg); default: unhandled: - ppIRStmt(st); - VG_(tool_panic)("hasBogusLiterals"); + ppIRStmt(st, NULL, 0); + VG_(tool_panic)("isBogusIRStmt"); } } +static Bool isBogusIRStmtVec(/*FLAT*/ IRStmtVec* stmts) +{ + for (UInt i = 0; i < stmts->stmts_used; i++) { + IRStmt* st = stmts->stmts[i]; + + Bool bogus = isBogusIRStmt(st); + if (0 && bogus) { + VG_(printf)("bogus: "); + ppIRStmt(st, NULL, 0); + VG_(printf)("\n"); + } + if (bogus) { + return True; + } + } + + return False; +} + + +/* Is to be called with already created and setup MCEnv as per initMCEnv(). */ +static void instrument_IRStmtVec(IRStmtVec* stmts_in, UInt stmts_in_first, + MCEnv* mce) +{ + for (UInt i = stmts_in_first; i < stmts_in->stmts_used; i++) { + IRStmt* st = stmts_in->stmts[i]; + UInt first_stmt = mce->stmts->stmts_used; + + if (mce->settings->trace) { + VG_(printf)("\n"); + ppIRStmt(st, mce->tyenv, mce->depth); + VG_(printf)("\n"); + } + + if (MC_(clo_mc_level) == 3) { + /* See comments on case Ist_CAS and Ist_IfThenElse below. */ + if (st->tag != Ist_CAS && st->tag != Ist_IfThenElse) + schemeS(mce, st); + } + + /* Generate instrumentation code for each stmt ... */ + + switch (st->tag) { + + case Ist_WrTmp: + assign('V', mce, findShadowTmpV(mce, st->Ist.WrTmp.tmp), + expr2vbits(mce, st->Ist.WrTmp.data)); + break; + + case Ist_Put: + do_shadow_PUT( mce, + st->Ist.Put.offset, + st->Ist.Put.data, + NULL /* shadow atom */, NULL /* guard */ ); + break; + + case Ist_PutI: + do_shadow_PUTI(mce, st->Ist.PutI.details); + break; + + case Ist_Store: + do_shadow_Store(mce, st->Ist.Store.end, + st->Ist.Store.addr, 0/* addr bias */, + st->Ist.Store.data, + NULL /* shadow data */, + NULL/*guard*/); + break; + + case Ist_StoreG: + do_shadow_StoreG(mce, st->Ist.StoreG.details); + break; + + case Ist_LoadG: + do_shadow_LoadG(mce, st->Ist.LoadG.details); + break; + + case Ist_Exit: + complainIfUndefined(mce, st->Ist.Exit.guard, NULL); + break; + + case Ist_IMark: + break; + + case Ist_NoOp: + case Ist_MBE: + break; + + case Ist_Dirty: + do_shadow_Dirty(mce, st->Ist.Dirty.details); + break; + + case Ist_AbiHint: + do_AbiHint(mce, st->Ist.AbiHint.base, + st->Ist.AbiHint.len, + st->Ist.AbiHint.nia); + break; + + case Ist_CAS: + do_shadow_CAS(mce, st->Ist.CAS.details); + /* Note, do_shadow_CAS copies the CAS itself to the output + stmts, because it needs to add instrumentation both + before and after it. Hence skip the copy below. Also + skip the origin-tracking stuff (call to schemeS) above, + since that's all tangled up with it too; do_shadow_CAS + does it all. */ + break; + + case Ist_LLSC: + do_shadow_LLSC( mce, + st->Ist.LLSC.end, + st->Ist.LLSC.result, + st->Ist.LLSC.addr, + st->Ist.LLSC.storedata ); + break; + + case Ist_IfThenElse: + do_shadow_IfThenElse(mce, st->Ist.IfThenElse.details->cond, + st->Ist.IfThenElse.details->hint, + st->Ist.IfThenElse.details->then_leg, + st->Ist.IfThenElse.details->else_leg, + st->Ist.IfThenElse.details->phi_nodes); + /* Note, do_shadow_IfThenElse copies the IfThenElse itself to the + output stmts, because it needs to add instrumentation to the legs + and to phi nodes. Hence skip the copy below. Also skip the + origin-tracking stuff (call to schemeS) above, since that's all + tangled up with it too; do_shadow_IfThenElse does it all. */ + break; + + default: + VG_(printf)("\n"); + ppIRStmt(st, mce->tyenv, 0); + VG_(printf)("\n"); + VG_(tool_panic)("memcheck: unhandled IRStmt"); + + } /* switch (st->tag) */ + + if (0 && mce->settings->trace) { + for (UInt j = first_stmt; j < mce->stmts->stmts_used; j++) { + ppIRStmt(mce->stmts->stmts[j], mce->tyenv, mce->depth + 1); + VG_(printf)("\n"); + } + VG_(printf)("\n"); + } + + /* ... and finally copy the stmt itself to the output. Except, + skip the copy of IRCASs and IfThenElse; see comments on cases Ist_CAS + and Ist_IfThenElse above. */ + if (st->tag != Ist_CAS && st->tag != Ist_IfThenElse) + stmt('C', mce, st); + } +} IRSB* MC_(instrument) ( VgCallbackClosure* closure, IRSB* sb_in, @@ -6363,11 +6623,7 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, const VexArchInfo* archinfo_host, IRType gWordTy, IRType hWordTy ) { - Bool verboze = 0||False; - Int i, j, first_stmt; - IRStmt* st; - MCEnv mce; - IRSB* sb_out; + Bool verboze = 0 || False; if (gWordTy != hWordTy) { /* We don't currently support this case. */ @@ -6385,19 +6641,14 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3); - /* Set up SB */ - sb_out = deepCopyIRSBExceptStmts(sb_in); + /* Set up SB, MCEnv and MCEnvSettings. */ + IRSB* sb_out = deepCopyIRSBExceptStmts(sb_in); - /* Set up the running environment. Both .sb and .tmpMap are - modified as we go along. Note that tmps are added to both - .sb->tyenv and .tmpMap together, so the valid index-set for - those two arrays should always be identical. */ - VG_(memset)(&mce, 0, sizeof(mce)); - mce.sb = sb_out; - mce.trace = verboze; - mce.layout = layout; - mce.hWordTy = hWordTy; - mce.bogusLiterals = False; + MCEnvSettings settings; + settings.trace = verboze; + settings.layout = layout; + settings.hWordTy = hWordTy; + settings.bogusLiterals = False; /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on Darwin. 10.7 is mostly built with LLVM, which uses these for @@ -6408,66 +6659,41 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, interpretation, but that would require some way to tag them in the _toIR.c front ends, which is a lot of faffing around. So for now just use the slow and blunt-instrument solution. */ - mce.useLLVMworkarounds = False; + settings.useLLVMworkarounds = False; # if defined(VGO_darwin) - mce.useLLVMworkarounds = True; + settings.useLLVMworkarounds = True; # endif - mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free), - sizeof(TempMapEnt)); - VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used); - for (i = 0; i < sb_in->tyenv->types_used; i++) { - TempMapEnt ent; - ent.kind = Orig; - ent.shadowV = IRTemp_INVALID; - ent.shadowB = IRTemp_INVALID; - VG_(addToXA)( mce.tmpMap, &ent ); - } - tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used ); + MCEnv mce; + initMCEnv(sb_out->tyenv, sb_in->stmts, &mce, NULL); + mce.settings = &settings; + sb_out->stmts = mce.stmts; + + tl_assert(isFlatIRSB(sb_in)); if (MC_(clo_expensive_definedness_checks)) { - /* For expensive definedness checking skip looking for bogus - literals. */ - mce.bogusLiterals = True; + /* For expensive definedness checking skip looking for bogus literals. */ + settings.bogusLiterals = True; } else { /* Make a preliminary inspection of the statements, to see if there are any dodgy-looking literals. If there are, we generate - extra-detailed (hence extra-expensive) instrumentation in - places. Scan the whole bb even if dodgyness is found earlier, - so that the flatness assertion is applied to all stmts. */ - Bool bogus = False; - - for (i = 0; i < sb_in->stmts_used; i++) { - st = sb_in->stmts[i]; - tl_assert(st); - tl_assert(isFlatIRStmt(st)); - - if (!bogus) { - bogus = checkForBogusLiterals(st); - if (0 && bogus) { - VG_(printf)("bogus: "); - ppIRStmt(st); - VG_(printf)("\n"); - } - if (bogus) break; - } - } - mce.bogusLiterals = bogus; + extra-detailed (hence extra-expensive) instrumentation in places. */ + settings.bogusLiterals = isBogusIRStmtVec(sb_in->stmts); } /* Copy verbatim any IR preamble preceding the first IMark */ - tl_assert(mce.sb == sb_out); - tl_assert(mce.sb != sb_in); + tl_assert(mce.stmts == sb_out->stmts); + tl_assert(mce.stmts != sb_in->stmts); - i = 0; - while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) { + UInt i = 0; + while (i < sb_in->stmts->stmts_used + && sb_in->stmts->stmts[i]->tag != Ist_IMark) { - st = sb_in->stmts[i]; - tl_assert(st); - tl_assert(isFlatIRStmt(st)); + IRStmt* st = sb_in->stmts->stmts[i]; + tl_assert(st != NULL); - stmt( 'C', &mce, sb_in->stmts[i] ); + stmt('C', &mce, sb_in->stmts->stmts[i]); i++; } @@ -6493,18 +6719,18 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, assignment for the corresponding origin (B) shadow, claiming no-origin, as appropriate for a defined value. */ - for (j = 0; j < i; j++) { - if (sb_in->stmts[j]->tag == Ist_WrTmp) { + for (UInt j = 0; j < i; j++) { + if (sb_in->stmts->stmts[j]->tag == Ist_WrTmp) { /* findShadowTmpV checks its arg is an original tmp; no need to assert that here. */ - IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp; + IRTemp tmp_o = sb_in->stmts->stmts[j]->Ist.WrTmp.tmp; IRTemp tmp_v = findShadowTmpV(&mce, tmp_o); - IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v); - assign( 'V', &mce, tmp_v, definedOfType( ty_v ) ); + IRType ty_v = typeOfIRTemp(mce.tyenv, tmp_v); + assign('V', &mce, tmp_v, definedOfType(ty_v)); if (MC_(clo_mc_level) == 3) { IRTemp tmp_b = findShadowTmpB(&mce, tmp_o); - tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32); - assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); + tl_assert(typeOfIRTemp(mce.tyenv, tmp_b) == Ity_I32); + assign('B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */); } if (0) { VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j); @@ -6516,129 +6742,13 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, /* Iterate over the remaining stmts to generate instrumentation. */ - tl_assert(sb_in->stmts_used > 0); - tl_assert(i >= 0); - tl_assert(i < sb_in->stmts_used); - tl_assert(sb_in->stmts[i]->tag == Ist_IMark); - - for (/* use current i*/; i < sb_in->stmts_used; i++) { - - st = sb_in->stmts[i]; - first_stmt = sb_out->stmts_used; - - if (verboze) { - VG_(printf)("\n"); - ppIRStmt(st); - VG_(printf)("\n"); - } - - if (MC_(clo_mc_level) == 3) { - /* See comments on case Ist_CAS below. */ - if (st->tag != Ist_CAS) - schemeS( &mce, st ); - } - - /* Generate instrumentation code for each stmt ... */ - - switch (st->tag) { - - case Ist_WrTmp: - assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp), - expr2vbits( &mce, st->Ist.WrTmp.data) ); - break; - - case Ist_Put: - do_shadow_PUT( &mce, - st->Ist.Put.offset, - st->Ist.Put.data, - NULL /* shadow atom */, NULL /* guard */ ); - break; - - case Ist_PutI: - do_shadow_PUTI( &mce, st->Ist.PutI.details); - break; - - case Ist_Store: - do_shadow_Store( &mce, st->Ist.Store.end, - st->Ist.Store.addr, 0/* addr bias */, - st->Ist.Store.data, - NULL /* shadow data */, - NULL/*guard*/ ); - break; - - case Ist_StoreG: - do_shadow_StoreG( &mce, st->Ist.StoreG.details ); - break; - - case Ist_LoadG: - do_shadow_LoadG( &mce, st->Ist.LoadG.details ); - break; - - case Ist_Exit: - complainIfUndefined( &mce, st->Ist.Exit.guard, NULL ); - break; - - case Ist_IMark: - break; - - case Ist_NoOp: - case Ist_MBE: - break; - - case Ist_Dirty: - do_shadow_Dirty( &mce, st->Ist.Dirty.details ); - break; - - case Ist_AbiHint: - do_AbiHint( &mce, st->Ist.AbiHint.base, - st->Ist.AbiHint.len, - st->Ist.AbiHint.nia ); - break; - - case Ist_CAS: - do_shadow_CAS( &mce, st->Ist.CAS.details ); - /* Note, do_shadow_CAS copies the CAS itself to the output - block, because it needs to add instrumentation both - before and after it. Hence skip the copy below. Also - skip the origin-tracking stuff (call to schemeS) above, - since that's all tangled up with it too; do_shadow_CAS - does it all. */ - break; - - case Ist_LLSC: - do_shadow_LLSC( &mce, - st->Ist.LLSC.end, - st->Ist.LLSC.result, - st->Ist.LLSC.addr, - st->Ist.LLSC.storedata ); - break; - - default: - VG_(printf)("\n"); - ppIRStmt(st); - VG_(printf)("\n"); - VG_(tool_panic)("memcheck: unhandled IRStmt"); - - } /* switch (st->tag) */ - - if (0 && verboze) { - for (j = first_stmt; j < sb_out->stmts_used; j++) { - VG_(printf)(" "); - ppIRStmt(sb_out->stmts[j]); - VG_(printf)("\n"); - } - VG_(printf)("\n"); - } - - /* ... and finally copy the stmt itself to the output. Except, - skip the copy of IRCASs; see comments on case Ist_CAS - above. */ - if (st->tag != Ist_CAS) - stmt('C', &mce, st); - } + tl_assert(sb_in->stmts->stmts_used > 0); + tl_assert(i < sb_in->stmts->stmts_used); + tl_assert(sb_in->stmts->stmts[i]->tag == Ist_IMark); + instrument_IRStmtVec(sb_in->stmts, i, &mce); /* Now we need to complain if the jump target is undefined. */ - first_stmt = sb_out->stmts_used; + UInt first_stmt = sb_out->stmts->stmts_used; if (verboze) { VG_(printf)("sb_in->next = "); @@ -6646,23 +6756,19 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, VG_(printf)("\n\n"); } - complainIfUndefined( &mce, sb_in->next, NULL ); + complainIfUndefined(&mce, sb_in->next, NULL); if (0 && verboze) { - for (j = first_stmt; j < sb_out->stmts_used; j++) { - VG_(printf)(" "); - ppIRStmt(sb_out->stmts[j]); + for (UInt j = first_stmt; j < sb_out->stmts->stmts_used; j++) { + ppIRStmt(sb_out->stmts->stmts[j], sb_out->tyenv, 1); VG_(printf)("\n"); } VG_(printf)("\n"); } - /* If this fails, there's been some serious snafu with tmp management, - that should be investigated. */ - tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used ); - VG_(deleteXA)( mce.tmpMap ); + tl_assert(mce.stmts == sb_out->stmts); + deinitMCEnv(&mce); - tl_assert(mce.sb == sb_out); return sb_out; } @@ -6873,16 +6979,9 @@ static Bool is_helperc_value_checkN_fail ( const HChar* name ) || 0==VG_(strcmp)(name, "1_fail_w_o)"); } -IRSB* MC_(final_tidy) ( IRSB* sb_in ) +static IRStmtVec* final_tidy_IRStmtVec(IRStmtVec* stmts) { - Int i; - IRStmt* st; - IRDirty* di; - IRExpr* guard; - IRCallee* cee; - Bool alreadyPresent; - Pairs pairs; - + Pairs pairs; pairs.pairsUsed = 0; pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123; @@ -6892,25 +6991,33 @@ IRSB* MC_(final_tidy) ( IRSB* sb_in ) of the relevant helpers is seen, check if we have made a previous call to the same helper using the same guard expression, and if so, delete the call. */ - for (i = 0; i < sb_in->stmts_used; i++) { - st = sb_in->stmts[i]; + for (UInt i = 0; i < stmts->stmts_used; i++) { + IRStmt* st = stmts->stmts[i]; tl_assert(st); + + if (st->tag == Ist_IfThenElse) { + final_tidy_IRStmtVec(st->Ist.IfThenElse.details->then_leg); + final_tidy_IRStmtVec(st->Ist.IfThenElse.details->else_leg); + } + if (st->tag != Ist_Dirty) continue; - di = st->Ist.Dirty.details; - guard = di->guard; + + IRDirty* di = st->Ist.Dirty.details; + IRExpr* guard = di->guard; tl_assert(guard); if (0) { ppIRExpr(guard); VG_(printf)("\n"); } - cee = di->cee; + IRCallee* cee = di->cee; if (!is_helperc_value_checkN_fail( cee->name )) continue; + /* Ok, we have a call to helperc_value_check0/1/4/8_fail with guard 'guard'. Check if we have already seen a call to this function with the same guard. If so, delete it. If not, add it to the set of calls we do know about. */ - alreadyPresent = check_or_add( &pairs, guard, cee->addr ); + Bool alreadyPresent = check_or_add( &pairs, guard, cee->addr ); if (alreadyPresent) { - sb_in->stmts[i] = IRStmt_NoOp(); + stmts->stmts[i] = IRStmt_NoOp(); if (0) VG_(printf)("XX\n"); } } @@ -6918,6 +7025,12 @@ IRSB* MC_(final_tidy) ( IRSB* sb_in ) tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123); tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456); + return stmts; +} + +IRSB* MC_(final_tidy) ( IRSB* sb_in ) +{ + final_tidy_IRStmtVec(sb_in->stmts); return sb_in; } @@ -6931,17 +7044,14 @@ IRSB* MC_(final_tidy) ( IRSB* sb_in ) /* Almost identical to findShadowTmpV. */ static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig ) { - TempMapEnt* ent; - /* VG_(indexXA) range-checks 'orig', hence no need to check - here. */ - ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); + /* VG_(indexXA) range-checks 'orig', hence no need to check here. */ + TempMapEnt* ent = (TempMapEnt*) VG_(indexXA)(mce->tmpMap, orig); tl_assert(ent->kind == Orig); if (ent->shadowB == IRTemp_INVALID) { - IRTemp tmpB - = newTemp( mce, Ity_I32, BSh ); + IRTemp tmpB = newTemp( mce, Ity_I32, BSh ); /* newTemp may cause mce->tmpMap to resize, hence previous results from VG_(indexXA) are invalid. */ - ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig ); + ent = (TempMapEnt*) VG_(indexXA)(mce->tmpMap, orig); tl_assert(ent->kind == Orig); tl_assert(ent->shadowB == IRTemp_INVALID); ent->shadowB = tmpB; @@ -6971,7 +7081,7 @@ static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, const HChar* hName; IRTemp bTmp; IRDirty* di; - IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); + IRType aTy = typeOfIRExpr(mce->tyenv, baseaddr); IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; IRAtom* ea = baseaddr; if (offset != 0) { @@ -6979,7 +7089,7 @@ static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, : mkU64( (Long)(Int)offset ); ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); } - bTmp = newTemp(mce, mce->hWordTy, BSh); + bTmp = newTemp(mce, mce->settings->hWordTy, BSh); switch (szB) { case 1: hFun = (void*)&MC_(helperc_b_load1); @@ -7020,7 +7130,7 @@ static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, /* no need to mess with any annotations. This call accesses neither guest state nor guest memory. */ stmt( 'B', mce, IRStmt_Dirty(di) ); - if (mce->hWordTy == Ity_I64) { + if (mce->settings->hWordTy == Ity_I64) { /* 64-bit host */ IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh); assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) ); @@ -7086,19 +7196,19 @@ static void gen_store_b ( MCEnv* mce, Int szB, void* hFun; const HChar* hName; IRDirty* di; - IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr ); + IRType aTy = typeOfIRExpr(mce->tyenv, baseaddr); IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64; IRAtom* ea = baseaddr; if (guard) { tl_assert(isOriginalAtom(mce, guard)); - tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1); + tl_assert(typeOfIRExpr(mce->tyenv, guard) == Ity_I1); } if (offset != 0) { IRAtom* off = aTy == Ity_I32 ? mkU32( offset ) : mkU64( (Long)(Int)offset ); ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off)); } - if (mce->hWordTy == Ity_I64) + if (mce->settings->hWordTy == Ity_I64) dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB)); switch (szB) { @@ -7134,7 +7244,7 @@ static void gen_store_b ( MCEnv* mce, Int szB, } static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { - IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); + IRType eTy = typeOfIRExpr(mce->tyenv, e); if (eTy == Ity_I64) return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) ); if (eTy == Ity_I32) @@ -7143,7 +7253,7 @@ static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) { } static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) { - IRType eTy = typeOfIRExpr(mce->sb->tyenv, e); + IRType eTy = typeOfIRExpr(mce->tyenv, e); tl_assert(eTy == Ity_I32); if (dstTy == Ity_I64) return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) ); @@ -7155,6 +7265,8 @@ static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) { tl_assert(MC_(clo_mc_level) == 3); + const VexGuestLayout* layout = mce->settings->layout; + switch (e->tag) { case Iex_GetI: { @@ -7169,8 +7281,8 @@ static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) return mkU32(0); tl_assert(sizeofIRType(equivIntTy) >= 4); tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); - descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, - equivIntTy, descr->nElems ); + descr_b = mkIRRegArray(descr->base + 2 * layout->total_sizeB, + equivIntTy, descr->nElems); /* Do a shadow indexed get of the same size, giving t1. Take the bottom 32 bits of it, giving t2. Compute into t3 the origin for the index (almost certainly zero, but there's @@ -7215,7 +7327,8 @@ static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) /* assert that the B value for the address is already available (somewhere) */ tl_assert(isIRAtom(e->Iex.Load.addr)); - tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64); + tl_assert(mce->settings->hWordTy == Ity_I32 + || mce->settings->hWordTy == Ity_I64); return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 ); } case Iex_ITE: { @@ -7270,12 +7383,10 @@ static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) e->Iex.Get.offset, sizeofIRType(e->Iex.Get.ty) ); - tl_assert(b_offset >= -1 - && b_offset <= mce->layout->total_sizeB -4); + tl_assert(b_offset >= -1 && b_offset <= layout->total_sizeB -4); if (b_offset >= 0) { /* FIXME: this isn't an atom! */ - return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB, - Ity_I32 ); + return IRExpr_Get(b_offset + 2 * layout->total_sizeB, Ity_I32); } return mkU32(0); } @@ -7289,6 +7400,8 @@ static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ) static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) { + const VexGuestLayout* layout = mce->settings->layout; + // This is a hacked version of do_shadow_Dirty Int i, k, n, toDo, gSz, gOff; IRAtom *here, *curr; @@ -7351,7 +7464,7 @@ static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) iffalse = mkU32(0); iftrue = assignNew( 'B', mce, Ity_I32, IRExpr_Get(b_offset - + 2*mce->layout->total_sizeB, + + 2 * layout->total_sizeB, Ity_I32)); here = assignNew( 'B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse)); @@ -7450,13 +7563,13 @@ static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) d->guard); iffalse = assignNew('B', mce, Ity_I32, IRExpr_Get(b_offset + - 2*mce->layout->total_sizeB, + 2 * layout->total_sizeB, Ity_I32)); curr = assignNew('V', mce, Ity_I32, IRExpr_ITE(cond, curr, iffalse)); stmt( 'B', mce, IRStmt_Put(b_offset - + 2*mce->layout->total_sizeB, + + 2 * layout->total_sizeB, curr )); } gSz -= n; @@ -7506,7 +7619,7 @@ static void do_origins_Store_guarded ( MCEnv* mce, XXXX how does this actually ensure that?? */ tl_assert(isIRAtom(stAddr)); tl_assert(isIRAtom(stData)); - dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) ); + dszB = sizeofIRType( typeOfIRExpr(mce->tyenv, stData ) ); dataB = schemeE( mce, stData ); gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard ); } @@ -7559,6 +7672,8 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) { tl_assert(MC_(clo_mc_level) == 3); + const VexGuestLayout* layout = mce->settings->layout; + switch (st->tag) { case Ist_AbiHint: @@ -7583,7 +7698,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) tl_assert(sizeofIRType(equivIntTy) >= 4); tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy)); descr_b - = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB, + = mkIRRegArray( descr->base + 2 * layout->total_sizeB, equivIntTy, descr->nElems ); /* Compute a value to Put - the conjoinment of the origin for the data to be Put-ted (obviously) and of the index value @@ -7623,7 +7738,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) if (st->Ist.LLSC.storedata == NULL) { /* Load Linked */ IRType resTy - = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); + = typeOfIRTemp(mce->tyenv, st->Ist.LLSC.result); IRExpr* vanillaLoad = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); tl_assert(resTy == Ity_I64 || resTy == Ity_I32 @@ -7649,11 +7764,11 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) Int b_offset = MC_(get_otrack_shadow_offset)( st->Ist.Put.offset, - sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data)) + sizeofIRType(typeOfIRExpr(mce->tyenv, st->Ist.Put.data)) ); if (b_offset >= 0) { /* FIXME: this isn't an atom! */ - stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB, + stmt( 'B', mce, IRStmt_Put(b_offset + 2 * layout->total_sizeB, schemeE( mce, st->Ist.Put.data )) ); } break; @@ -7672,7 +7787,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) default: VG_(printf)("mc_translate.c: schemeS: unhandled: "); - ppIRStmt(st); + ppIRStmt(st, mce->tyenv, 0); VG_(tool_panic)("memcheck:schemeS"); } } |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:43
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=1d66c7c464bc5e4e1b333bca99f10143712bafc5 commit 1d66c7c464bc5e4e1b333bca99f10143712bafc5 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 12:56:35 2017 +0200 Support If-Then-Else in Valgrind's gdbserver. Diff: --- coregrind/m_gdbserver/m_gdbserver.c | 108 ++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/coregrind/m_gdbserver/m_gdbserver.c b/coregrind/m_gdbserver/m_gdbserver.c index 648d543..566b449 100644 --- a/coregrind/m_gdbserver/m_gdbserver.c +++ b/coregrind/m_gdbserver/m_gdbserver.c @@ -1148,11 +1148,10 @@ void VG_(helperc_invalidate_if_not_gdbserved) ( Addr addr ) } static void VG_(add_stmt_call_invalidate_if_not_gdbserved) - ( IRSB* sb_in, - const VexGuestLayout* layout, - const VexGuestExtents* vge, - IRTemp jmp, - IRSB* irsb) + (const VexGuestLayout* layout, + const VexGuestExtents* vge, + IRTemp jmp, + IRSB* irsb) { void* fn; @@ -1171,7 +1170,7 @@ static void VG_(add_stmt_call_invalidate_if_not_gdbserved) di->nFxState = 0; - addStmtToIRSB(irsb, IRStmt_Dirty(di)); + addStmtToIRStmtVec(irsb->stmts, IRStmt_Dirty(di)); } /* software_breakpoint support --------------------------------------*/ @@ -1186,13 +1185,12 @@ static void VG_(add_stmt_call_invalidate_if_not_gdbserved) of other breaks in the same sb_in while the process is stopped), a debugger statement will be inserted for all instructions of a block. */ static void VG_(add_stmt_call_gdbserver) - (IRSB* sb_in, /* block being translated */ - const VexGuestLayout* layout, + (const VexGuestLayout* layout, const VexGuestExtents* vge, IRType gWordTy, IRType hWordTy, - Addr iaddr, /* Addr of instruction being instrumented */ - UChar delta, /* delta to add to iaddr to obtain IP */ - IRSB* irsb) /* irsb block to which call is added */ + Addr iaddr, /* Addr of instruction being instrumented */ + UChar delta, /* delta to add to iaddr to obtain IP */ + IRStmtVec* stmts) /* list of statements to which call is added */ { void* fn; const HChar* nm; @@ -1217,8 +1215,8 @@ static void VG_(add_stmt_call_gdbserver) IP when executing thumb code. gdb uses this thumb bit a.o. to properly guess the next IP for the 'step' and 'stepi' commands. */ vg_assert(delta <= 1); - addStmtToIRSB(irsb, IRStmt_Put(layout->offset_IP , - mkIRExpr_HWord(iaddr + (Addr)delta))); + addStmtToIRStmtVec(stmts, IRStmt_Put(layout->offset_IP , + mkIRExpr_HWord(iaddr + (Addr)delta))); fn = &VG_(helperc_CallDebugger); nm = "VG_(helperc_CallDebugger)"; @@ -1249,8 +1247,7 @@ static void VG_(add_stmt_call_gdbserver) di->fxState[1].nRepeats = 0; di->fxState[1].repeatLen = 0; - addStmtToIRSB(irsb, IRStmt_Dirty(di)); - + addStmtToIRStmtVec(stmts, IRStmt_Dirty(di)); } @@ -1273,31 +1270,27 @@ static void VG_(add_stmt_call_invalidate_exit_target_if_not_gdbserved) : sb_in->next->Iex.Const.con->Ico.U32); } else if (sb_in->next->tag == Iex_RdTmp) { VG_(add_stmt_call_invalidate_if_not_gdbserved) - (sb_in, layout, vge, sb_in->next->Iex.RdTmp.tmp, irsb); + (layout, vge, sb_in->next->Iex.RdTmp.tmp, irsb); } else { vg_assert (0); /* unexpected expression tag in exit. */ } } -IRSB* VG_(instrument_for_gdbserver_if_needed) - (IRSB* sb_in, - const VexGuestLayout* layout, - const VexGuestExtents* vge, - IRType gWordTy, IRType hWordTy) +static IRStmtVec* instrument_for_gdbserver_IRStmtVec + (IRStmtVec* stmts_in, + IRStmtVec* parent, + const VgVgdb instr_needed, + const VexGuestLayout* layout, + const VexGuestExtents* vge, + IRType gWordTy, IRType hWordTy) { - IRSB* sb_out; - Int i; - const VgVgdb instr_needed = VG_(gdbserver_instrumentation_needed) (vge); - - if (instr_needed == Vg_VgdbNo) - return sb_in; + IRStmtVec* stmts_out = emptyIRStmtVec(); + stmts_out->parent = parent; + stmts_out->id = stmts_in->id; + stmts_out->defset = deepCopyIRTempDefSet(stmts_in->defset); - - /* here, we need to instrument for gdbserver */ - sb_out = deepCopyIRSBExceptStmts(sb_in); - - for (i = 0; i < sb_in->stmts_used; i++) { - IRStmt* st = sb_in->stmts[i]; + for (UInt i = 0; i < stmts_in->stmts_used; i++) { + IRStmt* st = stmts_in->stmts[i]; if (!st || st->tag == Ist_NoOp) continue; @@ -1307,18 +1300,30 @@ IRSB* VG_(instrument_for_gdbserver_if_needed) st->Ist.Exit.dst->Ico.U64 : st->Ist.Exit.dst->Ico.U32); } - addStmtToIRSB( sb_out, st ); + + if (st->tag == Ist_IfThenElse) { + st = IRStmt_IfThenElse( + st->Ist.IfThenElse.details->cond, + st->Ist.IfThenElse.details->hint, + instrument_for_gdbserver_IRStmtVec( + st->Ist.IfThenElse.details->then_leg, stmts_out, + instr_needed, layout, vge, gWordTy, hWordTy), + instrument_for_gdbserver_IRStmtVec( + st->Ist.IfThenElse.details->else_leg, stmts_out, + instr_needed, layout, vge, gWordTy, hWordTy), + st->Ist.IfThenElse.details->phi_nodes); + } + addStmtToIRStmtVec(stmts_out, st); + if (st->tag == Ist_IMark) { /* For an Ist_Mark, add a call to debugger. */ switch (instr_needed) { case Vg_VgdbNo: vg_assert (0); case Vg_VgdbYes: case Vg_VgdbFull: - VG_(add_stmt_call_gdbserver) ( sb_in, layout, vge, - gWordTy, hWordTy, - st->Ist.IMark.addr, - st->Ist.IMark.delta, - sb_out); + VG_(add_stmt_call_gdbserver)(layout, vge, gWordTy, hWordTy, + st->Ist.IMark.addr, st->Ist.IMark.delta, + stmts_out); /* There is an optimisation possible here for Vg_VgdbFull: Put a guard ensuring we only call gdbserver if 'FullCallNeeded'. FullCallNeeded would be set to 1 we have just switched on @@ -1333,11 +1338,28 @@ IRSB* VG_(instrument_for_gdbserver_if_needed) } } + return stmts_out; +} + +IRSB* VG_(instrument_for_gdbserver_if_needed) + (IRSB* sb_in, + const VexGuestLayout* layout, + const VexGuestExtents* vge, + IRType gWordTy, IRType hWordTy) +{ + const VgVgdb instr_needed = VG_(gdbserver_instrumentation_needed) (vge); + + if (instr_needed == Vg_VgdbNo) + return sb_in; + + /* Here, we need to instrument for gdbserver. */ + IRSB* sb_out = deepCopyIRSBExceptStmts(sb_in); + sb_out->stmts = instrument_for_gdbserver_IRStmtVec(sb_in->stmts, NULL, + instr_needed, layout, vge, gWordTy, hWordTy); + if (instr_needed == Vg_VgdbYes) { - VG_(add_stmt_call_invalidate_exit_target_if_not_gdbserved) (sb_in, - layout, vge, - gWordTy, - sb_out); + VG_(add_stmt_call_invalidate_exit_target_if_not_gdbserved)(sb_in, layout, + vge, gWordTy, sb_out); } return sb_out; |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:37
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=00efe958cd9c7e6ecd13551e488cbbec5b9558f3 commit 00efe958cd9c7e6ecd13551e488cbbec5b9558f3 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 11:47:18 2017 +0200 Support If-Then-Else in the main JITter. Diff: --- coregrind/m_translate.c | 194 ++++++++++++++++++++++++++++-------------------- 1 file changed, 115 insertions(+), 79 deletions(-) diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c index 55c845d..7385713 100644 --- a/coregrind/m_translate.c +++ b/coregrind/m_translate.c @@ -268,39 +268,38 @@ IRSB* tool_instrument_then_gdbserver_if_needed ( VgCallbackClosure* closureV, only parts of SP. Bizarre, but it has been known to happen. */ static -IRSB* vg_SP_update_pass ( void* closureV, - IRSB* sb_in, - const VexGuestLayout* layout, - const VexGuestExtents* vge, - const VexArchInfo* vai, - IRType gWordTy, - IRType hWordTy ) +IRStmtVec* vg_SP_update_IRStmtVec(void* closureV, + IRTypeEnv* tyenv, + IRStmtVec* stmts_in, + IRStmtVec* parent, + const VexGuestLayout* layout, + const VexGuestExtents* vge, + const VexArchInfo* vai, + IRType gWordTy, + IRType hWordTy) { - Int i, j, k, minoff_ST, maxoff_ST, sizeof_SP, offset_SP; + Int j, k, minoff_ST, maxoff_ST; Int first_SP, last_SP, first_Put, last_Put; IRDirty *dcall, *d; - IRStmt* st; IRExpr* e; IRRegArray* descr; - IRType typeof_SP; Long delta, con; /* Set up stuff for tracking the guest IP */ Bool curr_IP_known = False; Addr curr_IP = 0; - /* Set up BB */ - IRSB* bb = emptyIRSB(); - bb->tyenv = deepCopyIRTypeEnv(sb_in->tyenv); - bb->next = deepCopyIRExpr(sb_in->next); - bb->jumpkind = sb_in->jumpkind; - bb->offsIP = sb_in->offsIP; + /* Set up new IRStmtVec */ + IRStmtVec* out = emptyIRStmtVec(); + out->parent = parent; + out->id = stmts_in->id; + out->defset = deepCopyIRTempDefSet(stmts_in->defset); delta = 0; - sizeof_SP = layout->sizeof_SP; - offset_SP = layout->offset_SP; - typeof_SP = sizeof_SP==4 ? Ity_I32 : Ity_I64; + UInt sizeof_SP = layout->sizeof_SP; + UInt offset_SP = layout->offset_SP; + IRType typeof_SP = sizeof_SP==4 ? Ity_I32 : Ity_I64; vg_assert(sizeof_SP == 4 || sizeof_SP == 8); /* --- Start of #defines --- */ @@ -351,7 +350,7 @@ IRSB* vg_SP_update_pass ( void* closureV, dcall->fxState[0].nRepeats = 0; \ dcall->fxState[0].repeatLen = 0; \ \ - addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); \ + addStmtToIRStmtVec(out, IRStmt_Dirty(dcall)); \ \ vg_assert(syze > 0); \ update_SP_aliases(syze); \ @@ -381,7 +380,7 @@ IRSB* vg_SP_update_pass ( void* closureV, dcall->fxState[0].nRepeats = 0; \ dcall->fxState[0].repeatLen = 0; \ \ - addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); \ + addStmtToIRStmtVec(out, IRStmt_Dirty(dcall) ); \ \ vg_assert(syze > 0); \ update_SP_aliases(-(syze)); \ @@ -392,11 +391,11 @@ IRSB* vg_SP_update_pass ( void* closureV, /* --- End of #defines --- */ + // TODO-JIT: can we move this to vg_SP_update_pass? clear_SP_aliases(); - for (i = 0; i < sb_in->stmts_used; i++) { - - st = sb_in->stmts[i]; + for (UInt i = 0; i < stmts_in->stmts_used; i++) { + IRStmt* st = stmts_in->stmts[i]; if (st->tag == Ist_IMark) { curr_IP_known = True; @@ -409,9 +408,9 @@ IRSB* vg_SP_update_pass ( void* closureV, if (e->tag != Iex_Get) goto case2; if (e->Iex.Get.offset != offset_SP) goto case2; if (e->Iex.Get.ty != typeof_SP) goto case2; - vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp) == typeof_SP ); + vg_assert( typeOfIRTemp(tyenv, st->Ist.WrTmp.tmp) == typeof_SP ); add_SP_alias(st->Ist.WrTmp.tmp, 0); - addStmtToIRSB( bb, st ); + addStmtToIRStmtVec(out, st); continue; case2: @@ -424,13 +423,13 @@ IRSB* vg_SP_update_pass ( void* closureV, if (e->Iex.Binop.arg2->tag != Iex_Const) goto case3; if (!IS_ADD_OR_SUB(e->Iex.Binop.op)) goto case3; con = GET_CONST(e->Iex.Binop.arg2->Iex.Const.con); - vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp) == typeof_SP ); + vg_assert( typeOfIRTemp(tyenv, st->Ist.WrTmp.tmp) == typeof_SP ); if (IS_ADD(e->Iex.Binop.op)) { add_SP_alias(st->Ist.WrTmp.tmp, delta + con); } else { add_SP_alias(st->Ist.WrTmp.tmp, delta - con); } - addStmtToIRSB( bb, st ); + addStmtToIRStmtVec(out, st); continue; case3: @@ -439,9 +438,9 @@ IRSB* vg_SP_update_pass ( void* closureV, e = st->Ist.WrTmp.data; if (e->tag != Iex_RdTmp) goto case4; if (!get_SP_delta(e->Iex.RdTmp.tmp, &delta)) goto case4; - vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp) == typeof_SP ); + vg_assert( typeOfIRTemp(tyenv, st->Ist.WrTmp.tmp) == typeof_SP ); add_SP_alias(st->Ist.WrTmp.tmp, delta); - addStmtToIRSB( bb, st ); + addStmtToIRStmtVec(out, st); continue; case4: @@ -454,7 +453,7 @@ IRSB* vg_SP_update_pass ( void* closureV, last_SP = first_SP + sizeof_SP - 1; first_Put = st->Ist.Put.offset; last_Put = first_Put - + sizeofIRType( typeOfIRExpr( bb->tyenv, st->Ist.Put.data )) + + sizeofIRType(typeOfIRExpr(tyenv, st->Ist.Put.data)) - 1; vg_assert(first_SP <= last_SP); vg_assert(first_Put <= last_Put); @@ -472,31 +471,31 @@ IRSB* vg_SP_update_pass ( void* closureV, put_SP_alias is immediately preceded by an assertion that we are putting in a binding for a correctly-typed temporary. */ - vg_assert( typeOfIRTemp(bb->tyenv, tttmp) == typeof_SP ); + vg_assert( typeOfIRTemp(tyenv, tttmp) == typeof_SP ); /* From the same type-and-offset-correctness argument, if we found a useable alias, it must for an "exact" write of SP. */ vg_assert(first_SP == first_Put); vg_assert(last_SP == last_Put); switch (delta) { - case 0: addStmtToIRSB(bb,st); continue; - case 4: DO_DIE( 4, tttmp); addStmtToIRSB(bb,st); continue; - case -4: DO_NEW( 4, tttmp); addStmtToIRSB(bb,st); continue; - case 8: DO_DIE( 8, tttmp); addStmtToIRSB(bb,st); continue; - case -8: DO_NEW( 8, tttmp); addStmtToIRSB(bb,st); continue; - case 12: DO_DIE( 12, tttmp); addStmtToIRSB(bb,st); continue; - case -12: DO_NEW( 12, tttmp); addStmtToIRSB(bb,st); continue; - case 16: DO_DIE( 16, tttmp); addStmtToIRSB(bb,st); continue; - case -16: DO_NEW( 16, tttmp); addStmtToIRSB(bb,st); continue; - case 32: DO_DIE( 32, tttmp); addStmtToIRSB(bb,st); continue; - case -32: DO_NEW( 32, tttmp); addStmtToIRSB(bb,st); continue; - case 112: DO_DIE( 112, tttmp); addStmtToIRSB(bb,st); continue; - case -112: DO_NEW( 112, tttmp); addStmtToIRSB(bb,st); continue; - case 128: DO_DIE( 128, tttmp); addStmtToIRSB(bb,st); continue; - case -128: DO_NEW( 128, tttmp); addStmtToIRSB(bb,st); continue; - case 144: DO_DIE( 144, tttmp); addStmtToIRSB(bb,st); continue; - case -144: DO_NEW( 144, tttmp); addStmtToIRSB(bb,st); continue; - case 160: DO_DIE( 160, tttmp); addStmtToIRSB(bb,st); continue; - case -160: DO_NEW( 160, tttmp); addStmtToIRSB(bb,st); continue; + case 0: addStmtToIRStmtVec(out,st); continue; + case 4: DO_DIE( 4, tttmp); addStmtToIRStmtVec(out,st); continue; + case -4: DO_NEW( 4, tttmp); addStmtToIRStmtVec(out,st); continue; + case 8: DO_DIE( 8, tttmp); addStmtToIRStmtVec(out,st); continue; + case -8: DO_NEW( 8, tttmp); addStmtToIRStmtVec(out,st); continue; + case 12: DO_DIE( 12, tttmp); addStmtToIRStmtVec(out,st); continue; + case -12: DO_NEW( 12, tttmp); addStmtToIRStmtVec(out,st); continue; + case 16: DO_DIE( 16, tttmp); addStmtToIRStmtVec(out,st); continue; + case -16: DO_NEW( 16, tttmp); addStmtToIRStmtVec(out,st); continue; + case 32: DO_DIE( 32, tttmp); addStmtToIRStmtVec(out,st); continue; + case -32: DO_NEW( 32, tttmp); addStmtToIRStmtVec(out,st); continue; + case 112: DO_DIE(112, tttmp); addStmtToIRStmtVec(out,st); continue; + case -112: DO_NEW(112, tttmp); addStmtToIRStmtVec(out,st); continue; + case 128: DO_DIE(128, tttmp); addStmtToIRStmtVec(out,st); continue; + case -128: DO_NEW(128, tttmp); addStmtToIRStmtVec(out,st); continue; + case 144: DO_DIE(144, tttmp); addStmtToIRStmtVec(out,st); continue; + case -144: DO_NEW(144, tttmp); addStmtToIRStmtVec(out,st); continue; + case 160: DO_DIE(160, tttmp); addStmtToIRStmtVec(out,st); continue; + case -160: DO_NEW(160, tttmp); addStmtToIRStmtVec(out,st); continue; default: /* common values for ppc64: 144 128 160 112 176 */ n_SP_updates_generic_known++; @@ -524,9 +523,9 @@ IRSB* vg_SP_update_pass ( void* closureV, generic: /* Pass both the old and new SP values to this helper. Also, pass an origin tag, even if it isn't needed. */ - old_SP = newIRTemp(bb->tyenv, typeof_SP); - addStmtToIRSB( - bb, + old_SP = newIRTemp(tyenv, out, typeof_SP); + addStmtToIRStmtVec( + out, IRStmt_WrTmp( old_SP, IRExpr_Get(offset_SP, typeof_SP) ) ); @@ -552,9 +551,9 @@ IRSB* vg_SP_update_pass ( void* closureV, mkIRExprVec_2( IRExpr_RdTmp(old_SP), st->Ist.Put.data ) ); - addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); + addStmtToIRStmtVec(out, IRStmt_Dirty(dcall)); /* don't forget the original assignment */ - addStmtToIRSB( bb, st ); + addStmtToIRStmtVec(out, st); } else { /* We have a partial update to SP. We need to know what the new SP will be, and hand that to the helper call, @@ -569,15 +568,16 @@ IRSB* vg_SP_update_pass ( void* closureV, */ IRTemp new_SP; /* 1 */ - addStmtToIRSB( bb, st ); + addStmtToIRStmtVec(out, st); /* 2 */ - new_SP = newIRTemp(bb->tyenv, typeof_SP); - addStmtToIRSB( - bb, + new_SP = newIRTemp(tyenv, out, typeof_SP); + addStmtToIRStmtVec( + out, IRStmt_WrTmp( new_SP, IRExpr_Get(offset_SP, typeof_SP) ) ); /* 3 */ - addStmtToIRSB( bb, IRStmt_Put(offset_SP, IRExpr_RdTmp(old_SP) )); + addStmtToIRStmtVec(out, + IRStmt_Put(offset_SP, IRExpr_RdTmp(old_SP))); /* 4 */ vg_assert(curr_IP_known); if (NULL != VG_(tdict).track_new_mem_stack_w_ECU) @@ -597,9 +597,10 @@ IRSB* vg_SP_update_pass ( void* closureV, mkIRExprVec_2( IRExpr_RdTmp(old_SP), IRExpr_RdTmp(new_SP) ) ); - addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); + addStmtToIRStmtVec(out, IRStmt_Dirty(dcall)); /* 5 */ - addStmtToIRSB( bb, IRStmt_Put(offset_SP, IRExpr_RdTmp(new_SP) )); + addStmtToIRStmtVec(out, + IRStmt_Put(offset_SP, IRExpr_RdTmp(new_SP))); } /* Forget what we already know. */ @@ -610,7 +611,7 @@ IRSB* vg_SP_update_pass ( void* closureV, if (first_Put == first_SP && last_Put == last_SP && st->Ist.Put.data->tag == Iex_RdTmp) { - vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.Put.data->Iex.RdTmp.tmp) + vg_assert( typeOfIRTemp(tyenv, st->Ist.Put.data->Iex.RdTmp.tmp) == typeof_SP ); add_SP_alias(st->Ist.Put.data->Iex.RdTmp.tmp, 0); } @@ -646,15 +647,27 @@ IRSB* vg_SP_update_pass ( void* closureV, } } - /* well, not interesting. Just copy and keep going. */ - addStmtToIRSB( bb, st ); + if (st->tag == Ist_IfThenElse) { + st = IRStmt_IfThenElse( + st->Ist.IfThenElse.details->cond, + st->Ist.IfThenElse.details->hint, + vg_SP_update_IRStmtVec(closureV, tyenv, + st->Ist.IfThenElse.details->then_leg, + out, layout, vge, vai, gWordTy, hWordTy), + vg_SP_update_IRStmtVec(closureV, tyenv, + st->Ist.IfThenElse.details->else_leg, + out, layout, vge, vai, gWordTy, hWordTy), + st->Ist.IfThenElse.details->phi_nodes); + } - } /* for (i = 0; i < sb_in->stmts_used; i++) */ + /* Well, not interesting. Just copy and keep going. */ + addStmtToIRStmtVec(out, st); + } /* for (UInt i = 0; i < stmts_in->stmts_used; i++) */ - return bb; + return out; complain: - VG_(core_panic)("vg_SP_update_pass: PutI or Dirty which overlaps SP"); + VG_(core_panic)("vg_SP_update_IRStmtVec: PutI or Dirty which overlaps SP"); #undef IS_ADD #undef IS_SUB @@ -664,6 +677,29 @@ IRSB* vg_SP_update_pass ( void* closureV, #undef DO_DIE } +/* See vg_SP_update_IRStmtVec() for detailed explanation. */ +static +IRSB* vg_SP_update_pass(void* closureV, + IRSB* sb_in, + const VexGuestLayout* layout, + const VexGuestExtents* vge, + const VexArchInfo* vai, + IRType gWordTy, + IRType hWordTy) +{ + /* Set up BB */ + IRSB* bb = emptyIRSB(); + bb->tyenv = deepCopyIRTypeEnv(sb_in->tyenv); + bb->id_seq = sb_in->id_seq; + bb->next = deepCopyIRExpr(sb_in->next); + bb->jumpkind = sb_in->jumpkind; + bb->offsIP = sb_in->offsIP; + + bb->stmts = vg_SP_update_IRStmtVec(closureV, bb->tyenv, sb_in->stmts, NULL, + layout, vge, vai, gWordTy, hWordTy); + return bb; +} + /*------------------------------------------------------------*/ /*--- Main entry point for the JITter. ---*/ /*------------------------------------------------------------*/ @@ -999,11 +1035,11 @@ static IRExpr* mkU32 ( UInt n ) { static IRExpr* mkU8 ( UChar n ) { return IRExpr_Const(IRConst_U8(n)); } -static IRExpr* narrowTo32 ( IRTypeEnv* tyenv, IRExpr* e ) { - if (typeOfIRExpr(tyenv, e) == Ity_I32) { +static IRExpr* narrowTo32 ( IRStmtVec* stmts, IRExpr* e ) { + if (typeOfIRExpr(stmts, e) == Ity_I32) { return e; } else { - vg_assert(typeOfIRExpr(tyenv, e) == Ity_I64); + vg_assert(typeOfIRExpr(stmts, e) == Ity_I64); return IRExpr_Unop(Iop_64to32, e); } } @@ -1056,7 +1092,7 @@ static void gen_PUSH ( IRSB* bb, IRExpr* e ) t1 = newIRTemp( bb->tyenv, ty_Word ); one = mkU(1); - vg_assert(typeOfIRExpr(bb->tyenv, e) == ty_Word); + vg_assert(typeOfIRExpr(bb->stmts, e) == ty_Word); /* t1 = guest_REDIR_SP + 1 */ addStmtToIRSB( @@ -1102,7 +1138,7 @@ static void gen_PUSH ( IRSB* bb, IRExpr* e ) addStmtToIRSB( bb, IRStmt_PutI(mkIRPutI(descr, - narrowTo32(bb->tyenv,IRExpr_RdTmp(t1)), 0, e))); + narrowTo32(bb->stmts, IRExpr_RdTmp(t1)), 0, e))); } @@ -1182,7 +1218,7 @@ static IRTemp gen_POP ( IRSB* bb ) bb, IRStmt_WrTmp( res, - IRExpr_GetI(descr, narrowTo32(bb->tyenv,IRExpr_RdTmp(t1)), 0) + IRExpr_GetI(descr, narrowTo32(bb->stmts, IRExpr_RdTmp(t1)), 0) ) ); @@ -1325,8 +1361,8 @@ Bool mk_preamble__set_NRADDR_to_zero ( void* closureV, IRSB* bb ) = sizeof(((VexGuestArchState*)0)->guest_NRADDR); vg_assert(nraddr_szB == 4 || nraddr_szB == 8); vg_assert(nraddr_szB == VG_WORDSIZE); - addStmtToIRSB( - bb, + addStmtToIRStmtVec( + bb->stmts, IRStmt_Put( offsetof(VexGuestArchState,guest_NRADDR), nraddr_szB == 8 ? mkU64(0) : mkU32(0) @@ -1383,8 +1419,8 @@ Bool mk_preamble__set_NRADDR_to_nraddr ( void* closureV, IRSB* bb ) = sizeof(((VexGuestArchState*)0)->guest_NRADDR); vg_assert(nraddr_szB == 4 || nraddr_szB == 8); vg_assert(nraddr_szB == VG_WORDSIZE); - addStmtToIRSB( - bb, + addStmtToIRStmtVec( + bb->stmts, IRStmt_Put( offsetof(VexGuestArchState,guest_NRADDR), nraddr_szB == 8 |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:31
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=a972237d6c9d24e1ead6223cab55c6799aea0ddf commit a972237d6c9d24e1ead6223cab55c6799aea0ddf Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 11:16:28 2017 +0200 Make VEX/useful/test_main.c compile under new rules. Diff: --- VEX/useful/test_main.c | 55 +++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/VEX/useful/test_main.c b/VEX/useful/test_main.c index 2d24aaf..3637064 100644 --- a/VEX/useful/test_main.c +++ b/VEX/useful/test_main.c @@ -165,7 +165,7 @@ int main ( int argc, char** argv ) assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF); for (i = 0; i < orig_nbytes; i++) { assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u)); - origbuf[18+ i] = (UChar)u; + origbuf[i] = (UChar)u; } /* FIXME: put sensible values into the .hwcaps fields */ @@ -198,7 +198,7 @@ int main ( int argc, char** argv ) /* ----- Set up args for LibVEX_Translate ----- */ vta.abiinfo_both = vbi; - vta.guest_bytes = &origbuf[18]; + vta.guest_bytes = &origbuf[0]; vta.guest_bytes_addr = orig_addr; vta.callback_opaque = NULL; vta.chase_into_ok = chase_into_not_ok; @@ -219,13 +219,13 @@ int main ( int argc, char** argv ) vta.arch_host = VexArchAMD64; vta.archinfo_host = vai_amd64; #endif -#if 0 /* x86 -> x86 */ +#if 1 /* x86 -> x86 */ vta.arch_guest = VexArchX86; vta.archinfo_guest = vai_x86; vta.arch_host = VexArchX86; vta.archinfo_host = vai_x86; #endif -#if 1 /* x86 -> mips32 */ +#if 0 /* x86 -> mips32 */ vta.arch_guest = VexArchX86; vta.archinfo_guest = vai_x86; vta.arch_host = VexArchMIPS32; @@ -612,7 +612,7 @@ static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig ) tl_assert(orig < mce->n_originalTmps); if (mce->tmpMap[orig] == IRTemp_INVALID) { mce->tmpMap[orig] - = newIRTemp(mce->bb->tyenv, + = newIRTemp(mce->bb->tyenv, mce->bb->stmts, shadowType(mce->bb->tyenv->types[orig])); } return mce->tmpMap[orig]; @@ -628,7 +628,7 @@ static void newShadowTmp ( MCEnv* mce, IRTemp orig ) { tl_assert(orig < mce->n_originalTmps); mce->tmpMap[orig] - = newIRTemp(mce->bb->tyenv, + = newIRTemp(mce->bb->tyenv, mce->bb->stmts, shadowType(mce->bb->tyenv->types[orig])); } @@ -726,11 +726,11 @@ static IRExpr* definedOfType ( IRType ty ) { /* assign value to tmp */ #define assign(_bb,_tmp,_expr) \ - addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr))) + addStmtToIRStmtVec((_bb->stmts), IRStmt_WrTmp((_tmp),(_expr))) /* add stmt to a bb */ #define stmt(_bb,_stmt) \ - addStmtToIRSB((_bb), (_stmt)) + addStmtToIRStmtVec((_bb->stmts), (_stmt)) /* build various kinds of expressions */ #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) @@ -746,7 +746,7 @@ static IRExpr* definedOfType ( IRType ty ) { temporary. This effectively converts an arbitrary expression into an atom. */ static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) { - IRTemp t = newIRTemp(mce->bb->tyenv, ty); + IRTemp t = newIRTemp(mce->bb->tyenv, mce->bb->stmts, ty); assign(mce->bb, t, e); return mkexpr(t); } @@ -2096,7 +2096,7 @@ IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias ) /* We need to have a place to park the V bits we're just about to read. */ - datavbits = newIRTemp(mce->bb->tyenv, ty); + datavbits = newIRTemp(mce->bb->tyenv, mce->bb->stmts, ty); di = unsafeIRDirty_1_N( datavbits, 1/*regparms*/, hname, helper, mkIRExprVec_1( addrAct )); @@ -2605,7 +2605,7 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) return isBogusAtom(st->Ist.Exit.guard); default: unhandled: - ppIRStmt(st); + ppIRStmt(st, NULL, 0); VG_(tool_panic)("hasBogusLiterals"); } } @@ -2619,13 +2619,12 @@ IRSB* mc_instrument ( void* closureV, /* Bool hasBogusLiterals = False; */ - Int i, j, first_stmt; - IRStmt* st; MCEnv mce; /* Set up BB */ IRSB* bb = emptyIRSB(); bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv); + bb->id_seq = bb_in->id_seq; bb->next = deepCopyIRExpr(bb_in->next); bb->jumpkind = bb_in->jumpkind; @@ -2633,20 +2632,20 @@ IRSB* mc_instrument ( void* closureV, along. */ mce.bb = bb; mce.layout = layout; - mce.n_originalTmps = bb->tyenv->types_used; + mce.n_originalTmps = bb->tyenv->used; mce.hWordTy = hWordTy; mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp)); - for (i = 0; i < mce.n_originalTmps; i++) + for (UInt i = 0; i < mce.n_originalTmps; i++) mce.tmpMap[i] = IRTemp_INVALID; + tl_assert(isFlatIRSB(bb_in)); + /* Iterate over the stmts. */ - for (i = 0; i < bb_in->stmts_used; i++) { - st = bb_in->stmts[i]; + for (UInt i = 0; i < bb_in->stmts->stmts_used; i++) { + IRStmt* st = bb_in->stmts->stmts[i]; if (!st) continue; - tl_assert(isFlatIRStmt(st)); - /* if (!hasBogusLiterals) { hasBogusLiterals = checkForBogusLiterals(st); @@ -2657,10 +2656,10 @@ IRSB* mc_instrument ( void* closureV, } } */ - first_stmt = bb->stmts_used; + UInt first_stmt = bb->stmts->stmts_used; if (verboze) { - ppIRStmt(st); + ppIRStmt(st, bb->tyenv, 0); VG_(printf)("\n\n"); } @@ -2707,27 +2706,27 @@ IRSB* mc_instrument ( void* closureV, default: VG_(printf)("\n"); - ppIRStmt(st); + ppIRStmt(st, bb->tyenv, 0); VG_(printf)("\n"); VG_(tool_panic)("memcheck: unhandled IRStmt"); } /* switch (st->tag) */ if (verboze) { - for (j = first_stmt; j < bb->stmts_used; j++) { + for (UInt j = first_stmt; j < bb->stmts->stmts_used; j++) { VG_(printf)(" "); - ppIRStmt(bb->stmts[j]); + ppIRStmt(bb->stmts->stmts[j], bb->tyenv, 0); VG_(printf)("\n"); } VG_(printf)("\n"); } - addStmtToIRSB(bb, st); + addStmtToIRStmtVec(bb->stmts, st); } /* Now we need to complain if the jump target is undefined. */ - first_stmt = bb->stmts_used; + UInt first_stmt = bb->stmts->stmts_used; if (verboze) { VG_(printf)("bb->next = "); @@ -2738,9 +2737,9 @@ IRSB* mc_instrument ( void* closureV, complainIfUndefined( &mce, bb->next ); if (verboze) { - for (j = first_stmt; j < bb->stmts_used; j++) { + for (UInt j = first_stmt; j < bb->stmts->stmts_used; j++) { VG_(printf)(" "); - ppIRStmt(bb->stmts[j]); + ppIRStmt(bb->stmts->stmts[j], bb->tyenv, 0); VG_(printf)("\n"); } VG_(printf)("\n"); |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:26
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=56e13af11ede1e40b4212bfc79eb822f66167c7b commit 56e13af11ede1e40b4212bfc79eb822f66167c7b Author: Ivo Raisr <iv...@iv...> Date: Mon Aug 28 23:31:03 2017 +0200 Support If-Then-Else and Phi nodes in VEX/priv/main_main.c. Diff: --- VEX/priv/ir_opt.c | 46 ----------------------------- VEX/priv/ir_opt.h | 3 -- VEX/priv/main_main.c | 81 ++++++++++++++++++++++++++-------------------------- 3 files changed, 41 insertions(+), 89 deletions(-) diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c index e0c0fcf..a16bdd9 100644 --- a/VEX/priv/ir_opt.c +++ b/VEX/priv/ir_opt.c @@ -6286,52 +6286,6 @@ static IRStmtVec* atbSubst_StmtVec( /*---------------------------------------------------------------*/ -/*--- The phi nodes deconstruction ---*/ -/*---------------------------------------------------------------*/ - -/* This isn't part of IR optimisation however this pass is needed before IRSB - is handed to instruction selection phase. Deconstructs all phi nodes. - Consider this example: - t2 = phi(t1,t0) - which gets trivially deconstructed into statements appended to: - - then leg: - t2 = t1 - - else leg: - t2 = t0 - - Such an IRSB no longer holds SSA property after this pass but subsequent - phases do no require it. */ -static void deconstruct_phi_nodes_IRStmtVec(IRStmtVec* stmts) -{ - for (UInt i = 0; i < stmts->stmts_used; i++) { - IRStmt* st = stmts->stmts[i]; - if (st->tag != Ist_IfThenElse) { - continue; - } - - IRIfThenElse* ite = st->Ist.IfThenElse.details; - IRStmtVec* then_leg = ite->then_leg; - IRStmtVec* else_leg = ite->else_leg; - for (UInt j = 0; j < ite->phi_nodes->phis_used; j++) { - const IRPhi* phi = ite->phi_nodes->phis[j]; - addStmtToIRStmtVec(then_leg, IRStmt_WrTmp(phi->dst, - IRExpr_RdTmp(phi->srcThen))); - addStmtToIRStmtVec(else_leg, IRStmt_WrTmp(phi->dst, - IRExpr_RdTmp(phi->srcElse))); - } - - deconstruct_phi_nodes_IRStmtVec(then_leg); - deconstruct_phi_nodes_IRStmtVec(else_leg); - } -} - -void deconstruct_phi_nodes(IRSB *irsb) -{ - deconstruct_phi_nodes_IRStmtVec(irsb->stmts); -} - - -/*---------------------------------------------------------------*/ /*--- MSVC specific transformation hacks ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/ir_opt.h b/VEX/priv/ir_opt.h index 7a8d05f..f67ed00 100644 --- a/VEX/priv/ir_opt.h +++ b/VEX/priv/ir_opt.h @@ -72,9 +72,6 @@ Addr ado_treebuild_BB ( VexRegisterUpdates pxControl ); -/* Deconstructs phi nodes. IRSB is modified and no longer holds SSA propery. */ -extern void deconstruct_phi_nodes(IRSB* bb); - #endif /* ndef __VEX_IR_OPT_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index b27d6ca..8a90755 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -320,7 +320,6 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta, VexGuestLayout* guest_layout; IRSB* irsb; - Int i; Int offB_CMSTART, offB_CMLEN, offB_GUEST_IP, szB_GUEST_IP; IRType guest_word_type; IRType host_word_type; @@ -589,7 +588,7 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta, vassert(vta->guest_extents->n_used >= 1 && vta->guest_extents->n_used <= 3); vassert(vta->guest_extents->base[0] == vta->guest_bytes_addr); - for (i = 0; i < vta->guest_extents->n_used; i++) { + for (UInt i = 0; i < vta->guest_extents->n_used; i++) { vassert(vta->guest_extents->len[i] < 10000); /* sanity */ } @@ -608,7 +607,7 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta, UInt guest_bytes_read = (UInt)vta->guest_extents->len[0]; vex_printf("GuestBytes %lx %u ", vta->guest_bytes_addr, guest_bytes_read ); - for (i = 0; i < guest_bytes_read; i++) { + for (UInt i = 0; i < guest_bytes_read; i++) { UInt b = (UInt)p[i]; vex_printf(" %02x", b ); sum = (sum << 1) ^ b; @@ -632,8 +631,9 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta, // the output of the front end, and iropt never screws up the IR by // itself, unless it is being hacked on. So remove this post-iropt // check in "production" use. - // sanityCheckIRSB( irsb, "after initial iropt", - // True/*must be flat*/, guest_word_type ); + /* TODO-JIT: remove for "production" use. */ + sanityCheckIRSB(irsb, "after initial iropt", + True/*must be flat*/, guest_word_type); if (vex_traceflags & VEX_TRACE_OPT1) { vex_printf("\n------------------------" @@ -672,9 +672,10 @@ IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs* vta, // JRS 2016 Aug 03: as above, this never actually fails in practice. // And we'll sanity check anyway after the post-instrumentation // cleanup pass. So skip this check in "production" use. - // if (vta->instrument1 || vta->instrument2) - // sanityCheckIRSB( irsb, "after instrumentation", - // True/*must be flat*/, guest_word_type ); + /* TODO-JIT: remove for "production" use. */ + if (vta->instrument1 || vta->instrument2) + sanityCheckIRSB(irsb, "after instrumentation", + True/*must be flat*/, guest_word_type ); /* Do a post-instrumentation cleanup pass. */ if (vta->instrument1 || vta->instrument2) { @@ -712,13 +713,15 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, Bool (*isMove) ( const HInstr*, HReg*, HReg* ); void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); + HInstrIfThenElse* (*isIfThenElse)(const HInstr*); void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); HInstr* (*genMove) ( HReg, HReg, Bool ); HInstr* (*directReload) ( HInstr*, HReg, Short ); void (*ppInstr) ( const HInstr*, Bool ); + void (*ppCondCode) ( HCondCode ); UInt (*ppReg) ( HReg ); - HInstrArray* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, + HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, Int, Int, Bool, Bool, Addr ); Int (*emit) ( /*MB_MOD*/Bool*, @@ -730,23 +733,25 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, const RRegUniverse* rRegUniv = NULL; Bool mode64, chainingAllowed; - Int i, j, k, out_used; + Int out_used; Int guest_sizeB; Int offB_HOST_EvC_COUNTER; Int offB_HOST_EvC_FAILADDR; Addr max_ga; UChar insn_bytes[128]; - HInstrArray* vcode; - HInstrArray* rcode; + HInstrSB* vcode; + HInstrSB* rcode; isMove = NULL; getRegUsage = NULL; mapRegs = NULL; + isIfThenElse = NULL; genSpill = NULL; genReload = NULL; genMove = NULL; directReload = NULL; ppInstr = NULL; + ppCondCode = NULL; ppReg = NULL; iselSB = NULL; emit = NULL; @@ -861,11 +866,13 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, getRegUsage = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); + isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr); genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); + ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); iselSB = X86FN(iselSB_X86); emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr); @@ -1072,21 +1079,16 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, vex_printf("\n"); if (vex_traceflags & VEX_TRACE_VCODE) { - for (i = 0; i < vcode->arr_used; i++) { - vex_printf("%3d ", i); - ppInstr(vcode->arr[i], mode64); - vex_printf("\n"); - } - vex_printf("\n"); + ppHInstrSB(vcode, isIfThenElse, ppInstr, ppCondCode, mode64); } /* Register allocate. */ RegAllocControl con = { .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, - .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload, - .genMove = genMove, .directReload = directReload, - .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg, - .mode64 = mode64}; + .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill, + .genReload = genReload, .genMove = genMove, .directReload = directReload, + .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppCondCode = ppCondCode, + .ppReg = ppReg, .mode64 = mode64}; switch (vex_control.regalloc_version) { case 2: rcode = doRegisterAllocation_v2(vcode, &con); @@ -1104,11 +1106,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, vex_printf("\n------------------------" " Register-allocated code " "------------------------\n\n"); - for (i = 0; i < rcode->arr_used; i++) { - vex_printf("%3d ", i); - ppInstr(rcode->arr[i], mode64); - vex_printf("\n"); - } + ppHInstrSB(rcode, isIfThenElse, ppInstr, ppCondCode, mode64); vex_printf("\n"); } @@ -1127,22 +1125,25 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, } out_used = 0; /* tracks along the host_bytes array */ - for (i = 0; i < rcode->arr_used; i++) { - HInstr* hi = rcode->arr[i]; + /* TODO-JIT: This needs another interface when assembler/flattener + is given whole HInstrSB and also pointer to function + which prints emitted bytes. */ + for (UInt i = 0; i < rcode->insns->insns_used; i++) { + HInstr* hi = rcode->insns->insns[i]; Bool hi_isProfInc = False; if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) { ppInstr(hi, mode64); vex_printf("\n"); } - j = emit( &hi_isProfInc, - insn_bytes, sizeof insn_bytes, hi, - mode64, vta->archinfo_host.endness, - vta->disp_cp_chain_me_to_slowEP, - vta->disp_cp_chain_me_to_fastEP, - vta->disp_cp_xindir, - vta->disp_cp_xassisted ); + Int j = emit(&hi_isProfInc, + insn_bytes, sizeof insn_bytes, hi, + mode64, vta->archinfo_host.endness, + vta->disp_cp_chain_me_to_slowEP, + vta->disp_cp_chain_me_to_fastEP, + vta->disp_cp_xindir, + vta->disp_cp_xassisted); if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) { - for (k = 0; k < j; k++) + for (Int k = 0; k < j; k++) vex_printf("%02x ", (UInt)insn_bytes[k]); vex_printf("\n\n"); } @@ -1159,7 +1160,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, res->offs_profInc = out_used; } { UChar* dst = &vta->host_bytes[out_used]; - for (k = 0; k < j; k++) { + for (Int k = 0; k < j; k++) { dst[k] = insn_bytes[k]; } out_used += j; @@ -1173,8 +1174,8 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, if (vex_traceflags) { /* Print the expansion ratio for this SB. */ - j = 0; /* total guest bytes */ - for (i = 0; i < vta->guest_extents->n_used; i++) { + UInt j = 0; /* total guest bytes */ + for (UInt i = 0; i < vta->guest_extents->n_used; i++) { j += vta->guest_extents->len[i]; } if (1) vex_printf("VexExpansionRatio %d %d %d :10\n\n", |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:21
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=2412e72067817b60130dcdf57b770a40744d726d commit 2412e72067817b60130dcdf57b770a40744d726d Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 09:47:24 2017 +0200 Support If-Then-Else in x86 isel backend. Diff: --- VEX/priv/host_x86_isel.c | 165 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 142 insertions(+), 23 deletions(-) diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index 45aafeb..721c159 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -124,6 +124,19 @@ static Bool isZeroU32 ( IRExpr* e ) // && e->Iex.Const.con->Ico.U64 == 0ULL; //} +static void print_depth(UInt depth) +{ + for (UInt i = 0; i < depth; i++) { + vex_printf(" "); + } +} + +static void print_IRStmt_prefix(UInt depth) +{ + vex_printf("\n"); + print_depth(depth); + vex_printf("-- "); +} /*---------------------------------------------------------*/ /*--- ISelEnv ---*/ @@ -147,7 +160,8 @@ static Bool isZeroU32 ( IRExpr* e ) 32-bit virtual HReg, which holds the high half of the value. - - The code array, that is, the insns selected so far. + - The code vector, that is, the insns selected so far. HInstrVec 'code' + changes according to the current IRStmtVec being processed. - A counter, for generating new virtual registers. @@ -172,11 +186,12 @@ static Bool isZeroU32 ( IRExpr* e ) typedef struct { /* Constant -- are set at the start and do not change. */ - IRTypeEnv* type_env; + HInstrSB* code_sb; + const IRTypeEnv* type_env; HReg* vregmap; HReg* vregmapHI; - Int n_vregmap; + UInt n_vregmap; UInt hwcaps; @@ -184,8 +199,9 @@ typedef Addr32 max_ga; /* These are modified as we go along. */ - HInstrArray* code; - Int vreg_ctr; + HInstrVec* code; + UInt vreg_ctr; + UInt depth; } ISelEnv; @@ -210,6 +226,7 @@ static void addInstr ( ISelEnv* env, X86Instr* instr ) { addHInstr(env->code, instr); if (vex_traceflags & VEX_TRACE_VCODE) { + print_depth(env->depth); ppX86Instr(instr, False); vex_printf("\n"); } @@ -3859,11 +3876,114 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e ) /*--- ISEL: Statements ---*/ /*---------------------------------------------------------*/ +static void iselStmt(ISelEnv* env, IRStmt* stmt); + +static HPhiNode* convertPhiNodes(ISelEnv* env, const IRPhiVec* phi_nodes, + IRIfThenElse_Hint hint, UInt *n_phis) +{ + *n_phis = phi_nodes->phis_used; + HPhiNode* hphis = LibVEX_Alloc_inline(*n_phis * sizeof(HPhiNode)); + + for (UInt i = 0; i < *n_phis; i++) { + const IRPhi* phi = phi_nodes->phis[i]; + hphis[i].dst = lookupIRTemp(env, phi->dst); + + switch (hint) { + case IfThenElse_ThenLikely: + hphis[i].srcFallThrough = lookupIRTemp(env, phi->srcThen); + hphis[i].srcOutOfLine = lookupIRTemp(env, phi->srcElse); + break; + case IfThenElse_ElseLikely: + hphis[i].srcFallThrough = lookupIRTemp(env, phi->srcElse); + hphis[i].srcOutOfLine = lookupIRTemp(env, phi->srcThen); + break; + default: + vassert(0); + } + } + return hphis; +} + +static void iselStmtVec(ISelEnv* env, IRStmtVec* stmts) +{ + for (UInt i = 0; i < stmts->stmts_used; i++) { + IRStmt* st = stmts->stmts[i]; + if (st->tag != Ist_IfThenElse) { + iselStmt(env, stmts->stmts[i]); + continue; + } + + /* Deal with IfThenElse. */ + HInstrVec* current_code = env->code; + IRIfThenElse* ite = st->Ist.IfThenElse.details; + if (vex_traceflags & VEX_TRACE_VCODE) { + print_IRStmt_prefix(env->depth); + ppIRIfThenElseCondHint(ite); + vex_printf(" then {\n"); + } + + UInt n_phis; + HPhiNode* phi_nodes = convertPhiNodes(env, ite->phi_nodes, + ite->hint, &n_phis); + + X86CondCode cc = iselCondCode(env, ite->cond); + /* Note: do not insert any instructions which alter |cc| before it + is consumed by the corresponding branch. */ + HInstrIfThenElse* hite = newHInstrIfThenElse(cc, phi_nodes, n_phis); + X86Instr* instr = X86Instr_IfThenElse(hite); + addInstr(env, instr); + + env->depth += 1; + + IRStmtVec* likely_leg; + IRStmtVec* unlikely_leg; + switch (ite->hint) { + case IfThenElse_ThenLikely: + likely_leg = ite->then_leg; + unlikely_leg = ite->else_leg; + break; + case IfThenElse_ElseLikely: + likely_leg = ite->else_leg; + unlikely_leg = ite->then_leg; + break; + default: + vassert(0); + } + + env->code = hite->fallThrough; + iselStmtVec(env, likely_leg); + if (vex_traceflags & VEX_TRACE_VCODE) { + print_IRStmt_prefix(env->depth - 1); + vex_printf("} else {\n"); + } + env->code = hite->outOfLine; + iselStmtVec(env, unlikely_leg); + + env->depth -= 1; + env->code = current_code; + + if (vex_traceflags & VEX_TRACE_VCODE) { + print_IRStmt_prefix(env->depth); + vex_printf("}\n"); + + for (UInt j = 0; j < hite->n_phis; j++) { + print_IRStmt_prefix(env->depth); + ppIRPhi(ite->phi_nodes->phis[j]); + vex_printf("\n"); + + print_depth(env->depth); + ppHPhiNode(&hite->phi_nodes[j]); + vex_printf("\n"); + } + } + } +} + static void iselStmt ( ISelEnv* env, IRStmt* stmt ) { if (vex_traceflags & VEX_TRACE_VCODE) { - vex_printf("\n-- "); - ppIRStmt(stmt); + print_IRStmt_prefix(env->depth); + ppIRStmt(stmt, env->type_env, 0); vex_printf("\n"); } @@ -4309,7 +4429,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) default: break; } stmt_fail: - ppIRStmt(stmt); + ppIRStmt(stmt, env->type_env, 0); vpanic("iselStmt"); } @@ -4419,7 +4539,7 @@ static void iselNext ( ISelEnv* env, /* Translate an entire SB to x86 code. */ -HInstrArray* iselSB_X86 ( const IRSB* bb, +HInstrSB* iselSB_X86 ( const IRSB* bb, VexArch arch_host, const VexArchInfo* archinfo_host, const VexAbiInfo* vbi/*UNUSED*/, @@ -4429,9 +4549,6 @@ HInstrArray* iselSB_X86 ( const IRSB* bb, Bool addProfInc, Addr max_ga ) { - Int i, j; - HReg hreg, hregHI; - ISelEnv* env; UInt hwcaps_host = archinfo_host->hwcaps; X86AMode *amCounter, *amFailAddr; @@ -4448,18 +4565,20 @@ HInstrArray* iselSB_X86 ( const IRSB* bb, vassert(archinfo_host->endness == VexEndnessLE); /* Make up an initial environment to use. */ - env = LibVEX_Alloc_inline(sizeof(ISelEnv)); + ISelEnv* env = LibVEX_Alloc_inline(sizeof(ISelEnv)); env->vreg_ctr = 0; - /* Set up output code array. */ - env->code = newHInstrArray(); + /* Set up output HInstrSB and the first processed HInstrVec. */ + env->code_sb = newHInstrSB(); + env->code = env->code_sb->insns; + env->depth = 0; /* Copy BB's type env. */ env->type_env = bb->tyenv; /* Make up an IRTemp -> virtual HReg mapping. This doesn't change as we go along. */ - env->n_vregmap = bb->tyenv->types_used; + env->n_vregmap = bb->tyenv->used; env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg)); @@ -4470,9 +4589,10 @@ HInstrArray* iselSB_X86 ( const IRSB* bb, /* For each IR temporary, allocate a suitably-kinded virtual register. */ - j = 0; - for (i = 0; i < env->n_vregmap; i++) { - hregHI = hreg = INVALID_HREG; + UInt j = 0; + for (UInt i = 0; i < env->n_vregmap; i++) { + HReg hreg = INVALID_HREG; + HReg hregHI = INVALID_HREG; switch (bb->tyenv->types[i]) { case Ity_I1: case Ity_I8: @@ -4505,14 +4625,13 @@ HInstrArray* iselSB_X86 ( const IRSB* bb, } /* Ok, finally we can iterate over the statements. */ - for (i = 0; i < bb->stmts_used; i++) - iselStmt(env, bb->stmts[i]); + iselStmtVec(env, bb->stmts); iselNext(env, bb->next, bb->jumpkind, bb->offsIP); /* record the number of vregs we used. */ - env->code->n_vregs = env->vreg_ctr; - return env->code; + env->code_sb->n_vregs = env->vreg_ctr; + return env->code_sb; } |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:15
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=e0f2b3b9d07e278bede972226e1f9b02cdf538a0 commit e0f2b3b9d07e278bede972226e1f9b02cdf538a0 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 07:03:04 2017 +0200 Introduce HInstrSB into VEX backend headers. Diff: --- VEX/priv/host_amd64_defs.h | 2 +- VEX/priv/host_arm64_defs.h | 2 +- VEX/priv/host_arm_defs.h | 2 +- VEX/priv/host_mips_defs.h | 2 +- VEX/priv/host_ppc_defs.h | 2 +- VEX/priv/host_s390_defs.h | 2 +- VEX/priv/host_x86_defs.c | 23 +++++++++++++++++++++++ VEX/priv/host_x86_defs.h | 11 +++++++++-- 8 files changed, 38 insertions(+), 8 deletions(-) diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 8a3eea8..57ef169 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -806,7 +806,7 @@ extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, extern const RRegUniverse* getRRegUniverse_AMD64 ( void ); -extern HInstrArray* iselSB_AMD64 ( const IRSB*, +extern HInstrSB* iselSB_AMD64 ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index e7da4f9..840e0aa 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -1011,7 +1011,7 @@ extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM64 ( void ); -extern HInstrArray* iselSB_ARM64 ( const IRSB*, +extern HInstrSB* iselSB_ARM64 ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index 56c4ec5..ec6358e 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -1074,7 +1074,7 @@ extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM ( void ); -extern HInstrArray* iselSB_ARM ( const IRSB*, +extern HInstrSB* iselSB_ARM ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index a4c0e78..45fff16 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -704,7 +704,7 @@ extern MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64); extern const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ); -extern HInstrArray *iselSB_MIPS ( const IRSB*, +extern HInstrSB *iselSB_MIPS ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 6b7fcc8..5cc9a85 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -1219,7 +1219,7 @@ extern PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64); extern const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ); -extern HInstrArray* iselSB_PPC ( const IRSB*, +extern HInstrSB* iselSB_PPC ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 937829c..41b6ecd 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -750,7 +750,7 @@ const RRegUniverse *getRRegUniverse_S390( void ); void genSpill_S390 ( HInstr **, HInstr **, HReg , Int , Bool ); void genReload_S390 ( HInstr **, HInstr **, HReg , Int , Bool ); extern s390_insn* genMove_S390(HReg from, HReg to, Bool mode64); -HInstrArray *iselSB_S390 ( const IRSB *, VexArch, const VexArchInfo *, +HInstrSB *iselSB_S390 ( const IRSB *, VexArch, const VexArchInfo *, const VexAbiInfo *, Int, Int, Bool, Bool, Addr); /* Return the number of bytes of code needed for an event check */ diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index 2e5c044..f4ff049 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -927,6 +927,13 @@ X86Instr* X86Instr_ProfInc ( void ) { i->tag = Xin_ProfInc; return i; } +X86Instr* X86Instr_IfThenElse(HInstrIfThenElse* hite) +{ + X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr)); + i->tag = Xin_IfThenElse; + i->Xin.IfThenElse.hite = hite; + return i; +} void ppX86Instr ( const X86Instr* i, Bool mode64 ) { vassert(mode64 == False); @@ -1217,11 +1224,20 @@ void ppX86Instr ( const X86Instr* i, Bool mode64 ) { vex_printf("(profInc) addl $1,NotKnownYet; " "adcl $0,NotKnownYet+4"); return; + case Xin_IfThenElse: + vex_printf("if (!%s) then {...", + showX86CondCode(i->Xin.IfThenElse.hite->ccOOL)); + return; default: vpanic("ppX86Instr"); } } +void ppX86CondCode(X86CondCode condCode) +{ + vex_printf("%s", showX86CondCode(condCode)); +} + /* --------- Helpers for register allocation. --------- */ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) @@ -1702,6 +1718,13 @@ Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst ) return False; } +extern HInstrIfThenElse* isIfThenElse_X86Instr(X86Instr* i) +{ + if (UNLIKELY(i->tag == Xin_IfThenElse)) { + return i->Xin.IfThenElse.hite; + } + return NULL; +} /* Generate x86 spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index 614b751..d32ff98 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -388,7 +388,8 @@ typedef Xin_SseCMov, /* SSE conditional move */ Xin_SseShuf, /* SSE2 shuffle (pshufd) */ Xin_EvCheck, /* Event check */ - Xin_ProfInc /* 64-bit profile counter increment */ + Xin_ProfInc, /* 64-bit profile counter increment */ + Xin_IfThenElse /* HInstrIfThenElse */ } X86InstrTag; @@ -652,6 +653,9 @@ typedef installed later, post-translation, by patching it in, as it is not known at translation time. */ } ProfInc; + struct { + HInstrIfThenElse* hite; + } IfThenElse; } Xin; } @@ -708,15 +712,18 @@ extern X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ); extern X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, X86AMode* amFailAddr ); extern X86Instr* X86Instr_ProfInc ( void ); +extern X86Instr* X86Instr_IfThenElse(HInstrIfThenElse*); extern void ppX86Instr ( const X86Instr*, Bool ); +extern void ppX86CondCode(X86CondCode); /* Some functions that insulate the register allocator from details of the underlying instruction set. */ extern void getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool ); extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool ); extern Bool isMove_X86Instr ( const X86Instr*, HReg*, HReg* ); +extern HInstrIfThenElse* isIfThenElse_X86Instr(X86Instr*); extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const X86Instr* i, Bool mode64, @@ -735,7 +742,7 @@ extern X86Instr* directReload_X86 ( X86Instr* i, HReg vreg, Short spill_off ); extern const RRegUniverse* getRRegUniverse_X86 ( void ); -extern HInstrArray* iselSB_X86 ( const IRSB*, +extern HInstrSB* iselSB_X86 ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:10
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=3bde2a012f0ccc05f5cd6bdf935c669218ba6f52 commit 3bde2a012f0ccc05f5cd6bdf935c669218ba6f52 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 10:59:08 2017 +0200 Necessary infrastructure to support HInstrVec, HInstrSB into host_generic_regs.c Diff: --- VEX/priv/host_generic_regs.c | 114 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 16 deletions(-) diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c index 67d2ea2..4d55c5b 100644 --- a/VEX/priv/host_generic_regs.c +++ b/VEX/priv/host_generic_regs.c @@ -313,28 +313,110 @@ HReg lookupHRegRemap ( HRegRemap* map, HReg orig ) /*--- Abstract instructions ---*/ /*---------------------------------------------------------*/ -HInstrArray* newHInstrArray ( void ) +HInstrVec* newHInstrVec(void) { - HInstrArray* ha = LibVEX_Alloc_inline(sizeof(HInstrArray)); - ha->arr_size = 4; - ha->arr_used = 0; - ha->arr = LibVEX_Alloc_inline(ha->arr_size * sizeof(HInstr*)); - ha->n_vregs = 0; - return ha; + HInstrVec* hv = LibVEX_Alloc_inline(sizeof(HInstrVec)); + hv->insns_size = 4; + hv->insns_used = 0; + hv->insns = LibVEX_Alloc_inline(hv->insns_size * sizeof(HInstr*)); + return hv; } __attribute__((noinline)) -void addHInstr_SLOW ( HInstrArray* ha, HInstr* instr ) +void addHInstr_SLOW(HInstrVec* hv, HInstr* instr) { - vassert(ha->arr_used == ha->arr_size); - Int i; - HInstr** arr2 = LibVEX_Alloc_inline(ha->arr_size * 2 * sizeof(HInstr*)); - for (i = 0; i < ha->arr_size; i++) { - arr2[i] = ha->arr[i]; + vassert(hv->insns_used == hv->insns_size); + HInstr** insns2 = LibVEX_Alloc_inline(hv->insns_size * 2 * sizeof(HInstr*)); + for (UInt i = 0; i < hv->insns_size; i++) { + insns2[i] = hv->insns[i]; } - ha->arr_size *= 2; - ha->arr = arr2; - addHInstr(ha, instr); + hv->insns_size *= 2; + hv->insns = insns2; + addHInstr(hv, instr); +} + +HInstrIfThenElse* newHInstrIfThenElse(HCondCode condCode, HPhiNode* phi_nodes, + UInt n_phis) +{ + HInstrIfThenElse* hite = LibVEX_Alloc_inline(sizeof(HInstrIfThenElse)); + hite->ccOOL = condCode; + hite->fallThrough = newHInstrVec(); + hite->outOfLine = newHInstrVec(); + hite->phi_nodes = phi_nodes; + hite->n_phis = n_phis; + return hite; +} + +static void print_depth(UInt depth) { + for (UInt i = 0; i < depth; i++) { + vex_printf(" "); + } +} + +void ppHPhiNode(const HPhiNode* phi_node) +{ + ppHReg(phi_node->dst); + vex_printf(" = phi("); + ppHReg(phi_node->srcFallThrough); + vex_printf(","); + ppHReg(phi_node->srcOutOfLine); + vex_printf(")"); +} + +static void ppHInstrVec(const HInstrVec* code, + HInstrIfThenElse* (*isIfThenElse)(const HInstr*), + void (*ppInstr)(const HInstr*, Bool), + void (*ppCondCode)(HCondCode), + Bool mode64, UInt depth, UInt *insn_num) +{ + for (UInt i = 0; i < code->insns_used; i++) { + const HInstr* instr = code->insns[i]; + const HInstrIfThenElse* hite = isIfThenElse(instr); + if (UNLIKELY(hite != NULL)) { + print_depth(depth); + vex_printf(" if (!"); + ppCondCode(hite->ccOOL); + vex_printf(") then fall-through {\n"); + ppHInstrVec(hite->fallThrough, isIfThenElse, ppInstr, ppCondCode, + mode64, depth + 1, insn_num); + print_depth(depth); + vex_printf(" } else out-of-line {\n"); + ppHInstrVec(hite->outOfLine, isIfThenElse, ppInstr, ppCondCode, + mode64, depth + 1, insn_num); + print_depth(depth); + vex_printf(" }\n"); + + for (UInt j = 0; j < hite->n_phis; j++) { + print_depth(depth); + vex_printf(" "); + ppHPhiNode(&hite->phi_nodes[j]); + vex_printf("\n"); + } + } else { + vex_printf("%3u ", (*insn_num)++); + print_depth(depth); + ppInstr(instr, mode64); + vex_printf("\n"); + } + } +} + +HInstrSB* newHInstrSB(void) +{ + HInstrSB* hsb = LibVEX_Alloc_inline(sizeof(HInstrSB)); + hsb->insns = newHInstrVec(); + hsb->n_vregs = 0; + return hsb; +} + +void ppHInstrSB(const HInstrSB* code, + HInstrIfThenElse* (*isIfThenElse)(const HInstr*), + void (*ppInstr)(const HInstr*, Bool), + void (*ppCondCode)(HCondCode), Bool mode64) +{ + UInt insn_num = 0; + ppHInstrVec(code->insns, isIfThenElse, ppInstr, ppCondCode, mode64, 0, + &insn_num); } |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:27:05
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=dcd9c1b70c7e2d37fe7761c11459f237fdba9925 commit dcd9c1b70c7e2d37fe7761c11459f237fdba9925 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 06:57:43 2017 +0200 Introduce HInstrVec, HInstrIfThenElse and HInstrSB into host_generic_regs.h. Diff: --- VEX/priv/host_generic_regs.h | 91 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h index 3db9ea0..bc6f230 100644 --- a/VEX/priv/host_generic_regs.h +++ b/VEX/priv/host_generic_regs.h @@ -359,41 +359,85 @@ static inline void initHRegRemap ( HRegRemap* map ) /* A type is needed to refer to pointers to instructions of any target. Defining it like this means that HInstr* can stand in for X86Instr*, ArmInstr*, etc. */ - typedef void HInstr; -/* An expandable array of HInstr*'s. Handy for insn selection and - register allocation. n_vregs indicates the number of virtual - registers mentioned in the code, something that reg-alloc needs to - know. These are required to be numbered 0 .. n_vregs-1. -*/ +/* An expandable vector of HInstr*'s. Handy for insn selection and + register allocation. */ typedef struct { - HInstr** arr; - Int arr_size; - Int arr_used; - Int n_vregs; + HInstr** insns; + UInt insns_size; + UInt insns_used; } - HInstrArray; + HInstrVec; -extern HInstrArray* newHInstrArray ( void ); +extern HInstrVec* newHInstrVec(void); /* Never call this directly. It's the slow and incomplete path for addHInstr. */ __attribute__((noinline)) -extern void addHInstr_SLOW ( HInstrArray*, HInstr* ); +extern void addHInstr_SLOW(HInstrVec*, HInstr*); -static inline void addHInstr ( HInstrArray* ha, HInstr* instr ) +static inline void addHInstr(HInstrVec* ha, HInstr* instr) { - if (LIKELY(ha->arr_used < ha->arr_size)) { - ha->arr[ha->arr_used] = instr; - ha->arr_used++; + if (LIKELY(ha->insns_used < ha->insns_size)) { + ha->insns[ha->insns_used] = instr; + ha->insns_used++; } else { addHInstr_SLOW(ha, instr); } } +/* Host-independent condition code. Stands for X86CondCode, ARM64CondCode... */ +typedef UInt HCondCode; + + +/* Phi node expressed in terms of HReg's. Analogy to IRPhi. */ +typedef + struct { + HReg dst; + HReg srcFallThrough; + HReg srcOutOfLine; + } + HPhiNode; + +extern void ppHPhiNode(const HPhiNode* phi_node); + + +/* Represents two alternative code paths: + - One more likely taken (hot path ~ fall through) + - One not so likely taken (cold path ~ out of line, OOL) */ +typedef + struct { + HCondCode ccOOL; // condition code for the OOL branch + HInstrVec* fallThrough; // generated from the likely-taken IR + HInstrVec* outOfLine; // generated from likely-not-taken IR + HPhiNode* phi_nodes; + UInt n_phis; + } + HInstrIfThenElse; + +extern HInstrIfThenElse* newHInstrIfThenElse(HCondCode, HPhiNode* phi_nodes, + UInt n_phis); + +/* Code block of HInstr's. + n_vregs indicates the number of virtual registers mentioned in the code, + something that reg-alloc needs to know. These are required to be + numbered 0 .. n_vregs-1. */ +typedef + struct { + HInstrVec* insns; + UInt n_vregs; + } + HInstrSB; + +extern HInstrSB* newHInstrSB(void); +extern void ppHInstrSB(const HInstrSB* code, + HInstrIfThenElse* (*isIfThenElse)(const HInstr*), + void (*ppInstr)(const HInstr*, Bool), + void (*ppCondCode)(HCondCode), Bool mode64); + /*---------------------------------------------------------*/ /*--- C-Call return-location descriptions ---*/ @@ -481,6 +525,10 @@ typedef /* Apply a reg-reg mapping to an insn. */ void (*mapRegs)(HRegRemap*, HInstr*, Bool); + /* Is this instruction actually HInstrIfThenElse? Returns pointer to + HInstrIfThenElse if yes, NULL otherwise. */ + HInstrIfThenElse* (*isIfThenElse) (const HInstr*), + /* Return insn(s) to spill/restore a real register to a spill slot offset. Also a function to move between registers. And optionally a function to do direct reloads. */ @@ -492,6 +540,7 @@ typedef /* For debug printing only. */ void (*ppInstr)(const HInstr*, Bool); + void (*ppCondCode)(HCondCode), UInt (*ppReg)(HReg); /* 32/64bit mode */ @@ -499,12 +548,12 @@ typedef } RegAllocControl; -extern HInstrArray* doRegisterAllocation_v2( - HInstrArray* instrs_in, +extern HInstrSB* doRegisterAllocation_v2( + HInstrSB* sb_in, const RegAllocControl* con ); -extern HInstrArray* doRegisterAllocation_v3( - HInstrArray* instrs_in, +extern HInstrSB* doRegisterAllocation_v3( + HInstrSB* sb_in, const RegAllocControl* con ); |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:59
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=3e5b6ae887f419c74a968b320e263edfd72f0890 commit 3e5b6ae887f419c74a968b320e263edfd72f0890 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 06:40:24 2017 +0200 Introduce an important assertion for a claim from libvex_ir.h. Diff: --- VEX/priv/main_util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/VEX/priv/main_util.c b/VEX/priv/main_util.c index e9a496b..4ed8866 100644 --- a/VEX/priv/main_util.c +++ b/VEX/priv/main_util.c @@ -123,6 +123,8 @@ void vexAllocSanityCheck ( void ) vassert(IS_WORD_ALIGNED(private_LibVEX_alloc_curr)); vassert(IS_WORD_ALIGNED(private_LibVEX_alloc_last+1)); # undef IS_WORD_ALIGNED + + STATIC_ASSERT(sizeof(IRTemp) == 4); } /* The current allocation mode. */ |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:54
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=53d73de7be7713619fcc38a1538fa7cebcf252dc commit 53d73de7be7713619fcc38a1538fa7cebcf252dc Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 10:13:43 2017 +0200 Make guest_x86_toIR.c compile under new rules. Diff: --- VEX/priv/guest_x86_toIR.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index 9f6a41a..0a38531 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -208,7 +208,8 @@ static Addr32 guest_EIP_bbstart; translated. */ static Addr32 guest_EIP_curr_instr; -/* The IRSB* into which we're generating code. */ +/* The IRSB* into which we're generating code. All functions below work + implicitly with the main statement vector held by irsb->stmts. */ static IRSB* irsb; @@ -309,17 +310,18 @@ static IRSB* irsb; #define R_GS 5 -/* Add a statement to the list held by "irbb". */ +/* Add a statement to the main statement vector held by "irbb->stmts". */ static void stmt ( IRStmt* st ) { - addStmtToIRSB( irsb, st ); + addStmtToIRStmtVec(irsb->stmts, st); } -/* Generate a new temporary of the given type. */ +/* Generate a new temporary of the given type. + Works only for the main IRStmtVec #0. */ static IRTemp newTemp ( IRType ty ) { vassert(isPlausibleIRType(ty)); - return newIRTemp( irsb->tyenv, ty ); + return newIRTemp(irsb->tyenv, irsb->stmts, ty); } /* Various simple conversions */ @@ -970,7 +972,7 @@ static void setFlags_DEP1_DEP2_shift ( IROp op32, Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0); vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); - vassert(guard); + vassert(guard != IRTemp_INVALID); /* Both kinds of right shifts are handled by the same thunk operation. */ @@ -1728,7 +1730,6 @@ IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) default: vpanic("disAMode(x86)"); - return 0; /*notreached*/ } } @@ -3432,7 +3433,6 @@ static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) mkU32(8 * sizeofIRType(ty)), unop(Iop_Clz32, mkexpr(src32x)) )); - IRTemp res = newTemp(ty); assign(res, narrowTo(ty, mkexpr(res32))); return res; @@ -8050,8 +8050,6 @@ static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) return t2; } vassert(0); - /*NOTREACHED*/ - return IRTemp_INVALID; } /*------------------------------------------------------------*/ @@ -15501,13 +15499,13 @@ DisResult disInstr_X86 ( IRSB* irsb_IN, guest_EIP_curr_instr = (Addr32)guest_IP; guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta); - x1 = irsb_IN->stmts_used; + x1 = irsb_IN->stmts->stmts_used; expect_CAS = False; dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, resteerCisOk, callback_opaque, delta, archinfo, abiinfo, sigill_diag_IN ); - x2 = irsb_IN->stmts_used; + x2 = irsb_IN->stmts->stmts_used; vassert(x2 >= x1); /* See comment at the top of disInstr_X86_WRK for meaning of @@ -15515,7 +15513,7 @@ DisResult disInstr_X86 ( IRSB* irsb_IN, IRCAS as directed by the returned expect_CAS value. */ has_CAS = False; for (i = x1; i < x2; i++) { - if (irsb_IN->stmts[i]->tag == Ist_CAS) + if (irsb_IN->stmts->stmts[i]->tag == Ist_CAS) has_CAS = True; } @@ -15528,8 +15526,7 @@ DisResult disInstr_X86 ( IRSB* irsb_IN, callback_opaque, delta, archinfo, abiinfo, sigill_diag_IN ); for (i = x1; i < x2; i++) { - vex_printf("\t\t"); - ppIRStmt(irsb_IN->stmts[i]); + ppIRStmt(irsb_IN->stmts->stmts[i], irsb_IN->tyenv, 4); vex_printf("\n"); } /* Failure of this assertion is serious and denotes a bug in |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:49
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=1112ed45eb9a2c1d210f00232e37603e587bb89a commit 1112ed45eb9a2c1d210f00232e37603e587bb89a Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 09:50:19 2017 +0200 Make guest_generic_bb_to_IR.c compile under new rules. Diff: --- VEX/priv/guest_generic_bb_to_IR.c | 57 +++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/VEX/priv/guest_generic_bb_to_IR.c b/VEX/priv/guest_generic_bb_to_IR.c index 6df594d..64f324b 100644 --- a/VEX/priv/guest_generic_bb_to_IR.c +++ b/VEX/priv/guest_generic_bb_to_IR.c @@ -271,9 +271,9 @@ IRSB* bb_to_IR ( each). We won't know until later the extents and checksums of the areas, if any, that need to be checked. */ nop = IRStmt_NoOp(); - selfcheck_idx = irsb->stmts_used; + selfcheck_idx = irsb->stmts->stmts_used; for (i = 0; i < 3 * 5; i++) - addStmtToIRSB( irsb, nop ); + addStmtToIRStmtVec(irsb->stmts, nop); /* If the caller supplied a function to add its own preamble, use it now. */ @@ -322,7 +322,7 @@ IRSB* bb_to_IR ( /* This is the irsb statement array index of the first stmt in this insn. That will always be the instruction-mark descriptor. */ - first_stmt_idx = irsb->stmts_used; + first_stmt_idx = irsb->stmts->stmts_used; /* Add an instruction-mark statement. We won't know until after disassembling the instruction how long it instruction is, so @@ -341,19 +341,19 @@ IRSB* bb_to_IR ( libvex_guest_arm.h. */ if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) { /* Thumb insn => mask out the T bit, but put it in delta */ - addStmtToIRSB( irsb, - IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1, - 0, /* len */ - 1 /* delta */ - ) + addStmtToIRStmtVec(irsb->stmts, + IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1, + 0, /* len */ + 1 /* delta */ + ) ); } else { /* All other targets: store IP as-is, and set delta to zero. */ - addStmtToIRSB( irsb, - IRStmt_IMark(guest_IP_curr_instr, - 0, /* len */ - 0 /* delta */ - ) + addStmtToIRStmtVec(irsb->stmts, + IRStmt_IMark(guest_IP_curr_instr, + 0, /* len */ + 0 /* delta */ + ) ); } @@ -408,18 +408,17 @@ IRSB* bb_to_IR ( } /* Fill in the insn-mark length field. */ - vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used); - imark = irsb->stmts[first_stmt_idx]; + vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts->stmts_used); + imark = irsb->stmts->stmts[first_stmt_idx]; vassert(imark); vassert(imark->tag == Ist_IMark); vassert(imark->Ist.IMark.len == 0); imark->Ist.IMark.len = dres.len; /* Print the resulting IR, if needed. */ - if (vex_traceflags & VEX_TRACE_FE) { - for (i = first_stmt_idx; i < irsb->stmts_used; i++) { - vex_printf(" "); - ppIRStmt(irsb->stmts[i]); + if (debug_print) { + for (i = first_stmt_idx; i < irsb->stmts->stmts_used; i++) { + ppIRStmt(irsb->stmts->stmts[i], irsb->tyenv, 3); vex_printf("\n"); } } @@ -432,9 +431,9 @@ IRSB* bb_to_IR ( /* Individual insn disassembly must finish the IR for each instruction with an assignment to the guest PC. */ - vassert(first_stmt_idx < irsb->stmts_used); + vassert(first_stmt_idx < irsb->stmts->stmts_used); /* it follows that irsb->stmts_used must be > 0 */ - { IRStmt* st = irsb->stmts[irsb->stmts_used-1]; + { IRStmt* st = irsb->stmts->stmts[irsb->stmts->stmts_used-1]; vassert(st); vassert(st->tag == Ist_Put); vassert(st->Ist.Put.offset == offB_GUEST_IP); @@ -693,8 +692,8 @@ IRSB* bb_to_IR ( the area of guest code to invalidate should we exit with a self-check failure. */ - tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type); - tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type); + tistart_tmp = newIRTemp(irsb->tyenv, irsb->stmts, guest_word_type); + tilen_tmp = newIRTemp(irsb->tyenv, irsb->stmts, guest_word_type); IRConst* base2check_IRConst = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check)) @@ -703,16 +702,16 @@ IRSB* bb_to_IR ( = guest_word_type==Ity_I32 ? IRConst_U32(len2check) : IRConst_U64(len2check); - irsb->stmts[selfcheck_idx + i * 5 + 0] + irsb->stmts->stmts[selfcheck_idx + i * 5 + 0] = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) ); - irsb->stmts[selfcheck_idx + i * 5 + 1] + irsb->stmts->stmts[selfcheck_idx + i * 5 + 1] = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) ); - irsb->stmts[selfcheck_idx + i * 5 + 2] + irsb->stmts->stmts[selfcheck_idx + i * 5 + 2] = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) ); - irsb->stmts[selfcheck_idx + i * 5 + 3] + irsb->stmts->stmts[selfcheck_idx + i * 5 + 3] = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) ); /* Generate the entry point descriptors */ @@ -754,7 +753,7 @@ IRSB* bb_to_IR ( ); } - irsb->stmts[selfcheck_idx + i * 5 + 4] + irsb->stmts->stmts[selfcheck_idx + i * 5 + 4] = IRStmt_Exit( IRExpr_Binop( host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32, @@ -777,7 +776,7 @@ IRSB* bb_to_IR ( Print it if necessary.*/ vassert(irsb->next != NULL); if (debug_print) { - vex_printf(" "); + vex_printf(" "); vex_printf( "PUT(%d) = ", irsb->offsIP); ppIRExpr( irsb->next ); vex_printf( "; exit-"); |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:44
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=f8bdd9ea500f2d7f84bc4c3348c09b40feeb4c4a commit f8bdd9ea500f2d7f84bc4c3348c09b40feeb4c4a Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 06:38:54 2017 +0200 ir_inject.c now plays according to the new rules. Diff: --- VEX/priv/ir_inject.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/VEX/priv/ir_inject.c b/VEX/priv/ir_inject.c index c127aca..249ff30 100644 --- a/VEX/priv/ir_inject.c +++ b/VEX/priv/ir_inject.c @@ -43,7 +43,7 @@ #define binop(kind, a1, a2) IRExpr_Binop(kind, a1, a2) #define triop(kind, a1, a2, a3) IRExpr_Triop(kind, a1, a2, a3) #define qop(kind, a1, a2, a3, a4) IRExpr_Qop(kind, a1, a2, a3, a4) -#define stmt(irsb, st) addStmtToIRSB(irsb, st) +#define stmt(irsb, st) addStmtToIRStmtVec(irsb->stmts, st) /* The IR Injection Control Block. vex_inject_ir will query its contents @@ -188,7 +188,7 @@ store(IRSB *irsb, IREndness endian, HWord haddr, IRExpr *data) /* Inject IR stmts depending on the data provided in the control - block iricb. */ + block iricb. IR statements are injected into main IRStmtVec with ID #0. */ void vex_inject_ir(IRSB *irsb, IREndness endian) { @@ -310,11 +310,14 @@ vex_inject_ir(IRSB *irsb, IREndness endian) if (0) { vex_printf("BEGIN inject\n"); if (iricb.t_result == Ity_I1 || sizeofIRType(iricb.t_result) <= 8) { - ppIRStmt(irsb->stmts[irsb->stmts_used - 1]); + ppIRStmt(irsb->stmts->stmts[irsb->stmts->stmts_used - 1], + irsb->tyenv, 0); } else if (sizeofIRType(iricb.t_result) == 16) { - ppIRStmt(irsb->stmts[irsb->stmts_used - 2]); + ppIRStmt(irsb->stmts->stmts[irsb->stmts->stmts_used - 2], + irsb->tyenv, 0); vex_printf("\n"); - ppIRStmt(irsb->stmts[irsb->stmts_used - 1]); + ppIRStmt(irsb->stmts->stmts[irsb->stmts->stmts_used - 1], + irsb->tyenv, 0); } vex_printf("\nEND inject\n"); } |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:43
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=5dc907113bcfaddee364137bf579dd43524858f4 commit 5dc907113bcfaddee364137bf579dd43524858f4 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 06:31:55 2017 +0200 Support If-Then-Else and Phi nodes in the IR optimizer. Diff: --- VEX/priv/ir_opt.c | 1562 +++++++++++++++++++++++++++++++++-------------------- VEX/priv/ir_opt.h | 3 + 2 files changed, 987 insertions(+), 578 deletions(-) diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c index f40870b..e0c0fcf 100644 --- a/VEX/priv/ir_opt.c +++ b/VEX/priv/ir_opt.c @@ -266,7 +266,7 @@ static void addToHHW ( HashHW* h, HWord key, HWord val ) /* Non-critical helper, heuristic for reducing the number of tmp-tmp copies made by flattening. If in doubt return False. */ -static Bool isFlat ( IRExpr* e ) +static Bool isFlat(const IRExpr* e) { if (e->tag == Iex_Get) return True; @@ -280,102 +280,101 @@ static Bool isFlat ( IRExpr* e ) /* Flatten out 'ex' so it is atomic, returning a new expression with the same value, after having appended extra IRTemp assignments to - the end of 'bb'. */ + the end of 'stmts'. */ -static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) +static IRExpr* flatten_Expr(IRTypeEnv* tyenv, IRStmtVec* stmts, IRExpr* ex) { Int i; IRExpr** newargs; - IRType ty = typeOfIRExpr(bb->tyenv, ex); + IRType ty = typeOfIRExpr(tyenv, ex); IRTemp t1; switch (ex->tag) { case Iex_GetI: - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_GetI(ex->Iex.GetI.descr, - flatten_Expr(bb, ex->Iex.GetI.ix), + flatten_Expr(tyenv, stmts, ex->Iex.GetI.ix), ex->Iex.GetI.bias))); return IRExpr_RdTmp(t1); case Iex_Get: - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, - IRStmt_WrTmp(t1, ex)); + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, ex)); return IRExpr_RdTmp(t1); case Iex_Qop: { IRQop* qop = ex->Iex.Qop.details; - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_Qop(qop->op, - flatten_Expr(bb, qop->arg1), - flatten_Expr(bb, qop->arg2), - flatten_Expr(bb, qop->arg3), - flatten_Expr(bb, qop->arg4)))); + flatten_Expr(tyenv, stmts, qop->arg1), + flatten_Expr(tyenv, stmts, qop->arg2), + flatten_Expr(tyenv, stmts, qop->arg3), + flatten_Expr(tyenv, stmts, qop->arg4)))); return IRExpr_RdTmp(t1); } case Iex_Triop: { IRTriop* triop = ex->Iex.Triop.details; - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_Triop(triop->op, - flatten_Expr(bb, triop->arg1), - flatten_Expr(bb, triop->arg2), - flatten_Expr(bb, triop->arg3)))); + flatten_Expr(tyenv, stmts, triop->arg1), + flatten_Expr(tyenv, stmts, triop->arg2), + flatten_Expr(tyenv, stmts, triop->arg3)))); return IRExpr_RdTmp(t1); } case Iex_Binop: - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_Binop(ex->Iex.Binop.op, - flatten_Expr(bb, ex->Iex.Binop.arg1), - flatten_Expr(bb, ex->Iex.Binop.arg2)))); + flatten_Expr(tyenv, stmts, ex->Iex.Binop.arg1), + flatten_Expr(tyenv, stmts, ex->Iex.Binop.arg2)))); return IRExpr_RdTmp(t1); case Iex_Unop: - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_Unop(ex->Iex.Unop.op, - flatten_Expr(bb, ex->Iex.Unop.arg)))); + flatten_Expr(tyenv, stmts, ex->Iex.Unop.arg)))); return IRExpr_RdTmp(t1); case Iex_Load: - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_Load(ex->Iex.Load.end, ex->Iex.Load.ty, - flatten_Expr(bb, ex->Iex.Load.addr)))); + flatten_Expr(tyenv, stmts, ex->Iex.Load.addr)))); return IRExpr_RdTmp(t1); case Iex_CCall: newargs = shallowCopyIRExprVec(ex->Iex.CCall.args); for (i = 0; newargs[i]; i++) - newargs[i] = flatten_Expr(bb, newargs[i]); - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + newargs[i] = flatten_Expr(tyenv, stmts, newargs[i]); + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_CCall(ex->Iex.CCall.cee, ex->Iex.CCall.retty, newargs))); return IRExpr_RdTmp(t1); case Iex_ITE: - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, - IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond), - flatten_Expr(bb, ex->Iex.ITE.iftrue), - flatten_Expr(bb, ex->Iex.ITE.iffalse)))); + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, + IRExpr_ITE(flatten_Expr(tyenv, stmts, ex->Iex.ITE.cond), + flatten_Expr(tyenv, stmts, ex->Iex.ITE.iftrue), + flatten_Expr(tyenv, stmts, ex->Iex.ITE.iffalse)))); return IRExpr_RdTmp(t1); case Iex_Const: /* Lift F64i constants out onto temps so they can be CSEd later. */ if (ex->Iex.Const.con->tag == Ico_F64i) { - t1 = newIRTemp(bb->tyenv, ty); - addStmtToIRSB(bb, IRStmt_WrTmp(t1, + t1 = newIRTemp(tyenv, stmts, ty); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(t1, IRExpr_Const(ex->Iex.Const.con))); return IRExpr_RdTmp(t1); } else { @@ -394,10 +393,12 @@ static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex ) } } +static IRStmtVec* flatten_IRStmtVec(IRTypeEnv* tyenv, IRStmtVec* in, + IRStmtVec* parent); -/* Append a completely flattened form of 'st' to the end of 'bb'. */ - -static void flatten_Stmt ( IRSB* bb, IRStmt* st ) +/* Append a completely flattened form of 'st' to the end of 'stmts'. */ +static void flatten_Stmt(IRTypeEnv* tyenv, IRStmtVec* stmts, IRStmt* st, + IRStmtVec* parent) { Int i; IRExpr *e1, *e2, *e3, *e4, *e5; @@ -411,69 +412,69 @@ static void flatten_Stmt ( IRSB* bb, IRStmt* st ) if (isIRAtom(st->Ist.Put.data)) { /* optimisation to reduce the amount of heap wasted by the flattener */ - addStmtToIRSB(bb, st); + addStmtToIRStmtVec(stmts, st); } else { /* general case, always correct */ - e1 = flatten_Expr(bb, st->Ist.Put.data); - addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1)); + e1 = flatten_Expr(tyenv, stmts, st->Ist.Put.data); + addStmtToIRStmtVec(stmts, IRStmt_Put(st->Ist.Put.offset, e1)); } break; case Ist_PutI: puti = st->Ist.PutI.details; - e1 = flatten_Expr(bb, puti->ix); - e2 = flatten_Expr(bb, puti->data); + e1 = flatten_Expr(tyenv, stmts, puti->ix); + e2 = flatten_Expr(tyenv, stmts, puti->data); puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2); - addStmtToIRSB(bb, IRStmt_PutI(puti2)); + addStmtToIRStmtVec(stmts, IRStmt_PutI(puti2)); break; case Ist_WrTmp: if (isFlat(st->Ist.WrTmp.data)) { /* optimisation, to reduce the number of tmp-tmp copies generated */ - addStmtToIRSB(bb, st); + addStmtToIRStmtVec(stmts, st); } else { /* general case, always correct */ - e1 = flatten_Expr(bb, st->Ist.WrTmp.data); - addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); + e1 = flatten_Expr(tyenv, stmts, st->Ist.WrTmp.data); + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1)); } break; case Ist_Store: - e1 = flatten_Expr(bb, st->Ist.Store.addr); - e2 = flatten_Expr(bb, st->Ist.Store.data); - addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2)); + e1 = flatten_Expr(tyenv, stmts, st->Ist.Store.addr); + e2 = flatten_Expr(tyenv, stmts, st->Ist.Store.data); + addStmtToIRStmtVec(stmts, IRStmt_Store(st->Ist.Store.end, e1,e2)); break; case Ist_StoreG: sg = st->Ist.StoreG.details; - e1 = flatten_Expr(bb, sg->addr); - e2 = flatten_Expr(bb, sg->data); - e3 = flatten_Expr(bb, sg->guard); - addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3)); + e1 = flatten_Expr(tyenv, stmts, sg->addr); + e2 = flatten_Expr(tyenv, stmts, sg->data); + e3 = flatten_Expr(tyenv, stmts, sg->guard); + addStmtToIRStmtVec(stmts, IRStmt_StoreG(sg->end, e1, e2, e3)); break; case Ist_LoadG: lg = st->Ist.LoadG.details; - e1 = flatten_Expr(bb, lg->addr); - e2 = flatten_Expr(bb, lg->alt); - e3 = flatten_Expr(bb, lg->guard); - addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst, - e1, e2, e3)); + e1 = flatten_Expr(tyenv, stmts, lg->addr); + e2 = flatten_Expr(tyenv, stmts, lg->alt); + e3 = flatten_Expr(tyenv, stmts, lg->guard); + addStmtToIRStmtVec(stmts, IRStmt_LoadG(lg->end, lg->cvt, lg->dst, + e1, e2, e3)); break; case Ist_CAS: cas = st->Ist.CAS.details; - e1 = flatten_Expr(bb, cas->addr); - e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL; - e3 = flatten_Expr(bb, cas->expdLo); - e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL; - e5 = flatten_Expr(bb, cas->dataLo); + e1 = flatten_Expr(tyenv, stmts, cas->addr); + e2 = cas->expdHi ? flatten_Expr(tyenv, stmts, cas->expdHi) : NULL; + e3 = flatten_Expr(tyenv, stmts, cas->expdLo); + e4 = cas->dataHi ? flatten_Expr(tyenv, stmts, cas->dataHi) : NULL; + e5 = flatten_Expr(tyenv, stmts, cas->dataLo); cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, e1, e2, e3, e4, e5 ); - addStmtToIRSB(bb, IRStmt_CAS(cas2)); + addStmtToIRStmtVec(stmts, IRStmt_CAS(cas2)); break; case Ist_LLSC: - e1 = flatten_Expr(bb, st->Ist.LLSC.addr); + e1 = flatten_Expr(tyenv, stmts, st->Ist.LLSC.addr); e2 = st->Ist.LLSC.storedata - ? flatten_Expr(bb, st->Ist.LLSC.storedata) + ? flatten_Expr(tyenv, stmts, st->Ist.LLSC.storedata) : NULL; - addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end, - st->Ist.LLSC.result, e1, e2)); + addStmtToIRStmtVec(stmts, IRStmt_LLSC(st->Ist.LLSC.end, + st->Ist.LLSC.result, e1, e2)); break; case Ist_Dirty: d = st->Ist.Dirty.details; @@ -481,53 +482,72 @@ static void flatten_Stmt ( IRSB* bb, IRStmt* st ) *d2 = *d; d2->args = shallowCopyIRExprVec(d2->args); if (d2->mFx != Ifx_None) { - d2->mAddr = flatten_Expr(bb, d2->mAddr); + d2->mAddr = flatten_Expr(tyenv, stmts, d2->mAddr); } else { vassert(d2->mAddr == NULL); } - d2->guard = flatten_Expr(bb, d2->guard); + d2->guard = flatten_Expr(tyenv, stmts, d2->guard); for (i = 0; d2->args[i]; i++) { IRExpr* arg = d2->args[i]; if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) - d2->args[i] = flatten_Expr(bb, arg); + d2->args[i] = flatten_Expr(tyenv, stmts, arg); } - addStmtToIRSB(bb, IRStmt_Dirty(d2)); + addStmtToIRStmtVec(stmts, IRStmt_Dirty(d2)); break; case Ist_NoOp: case Ist_MBE: case Ist_IMark: - addStmtToIRSB(bb, st); + addStmtToIRStmtVec(stmts, st); break; case Ist_AbiHint: - e1 = flatten_Expr(bb, st->Ist.AbiHint.base); - e2 = flatten_Expr(bb, st->Ist.AbiHint.nia); - addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); + e1 = flatten_Expr(tyenv, stmts, st->Ist.AbiHint.base); + e2 = flatten_Expr(tyenv, stmts, st->Ist.AbiHint.nia); + addStmtToIRStmtVec(stmts, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2)); break; case Ist_Exit: - e1 = flatten_Expr(bb, st->Ist.Exit.guard); - addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, - st->Ist.Exit.dst, - st->Ist.Exit.offsIP)); + e1 = flatten_Expr(tyenv, stmts, st->Ist.Exit.guard); + addStmtToIRStmtVec(stmts, IRStmt_Exit(e1, st->Ist.Exit.jk, + st->Ist.Exit.dst, + st->Ist.Exit.offsIP)); + break; + case Ist_IfThenElse: { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + e1 = flatten_Expr(tyenv, stmts, ite->cond); + addStmtToIRStmtVec(stmts, + IRStmt_IfThenElse(e1, ite->hint, + flatten_IRStmtVec(tyenv, ite->then_leg, parent), + flatten_IRStmtVec(tyenv, ite->else_leg, parent), + ite->phi_nodes)); break; + } default: vex_printf("\n"); - ppIRStmt(st); + ppIRStmt(st, tyenv, 0); vex_printf("\n"); vpanic("flatten_Stmt"); } } +static IRStmtVec* flatten_IRStmtVec(IRTypeEnv* tyenv, IRStmtVec* in, + IRStmtVec* parent) +{ + IRStmtVec* out = emptyIRStmtVec(); + out->parent = parent; + out->id = in->id; + out->defset = deepCopyIRTempDefSet(in->defset); + for (UInt i = 0; i < in->stmts_used; i++) { + flatten_Stmt(tyenv, out, in->stmts[i], out); + } + return out; +} static IRSB* flatten_BB ( IRSB* in ) { - Int i; - IRSB* out; - out = emptyIRSB(); - out->tyenv = deepCopyIRTypeEnv( in->tyenv ); - for (i = 0; i < in->stmts_used; i++) - if (in->stmts[i]) - flatten_Stmt( out, in->stmts[i] ); - out->next = flatten_Expr( out, in->next ); + IRSB* out = emptyIRSB(); + out->tyenv = deepCopyIRTypeEnv(in->tyenv); + out->id_seq = in->id_seq; + out->stmts = flatten_IRStmtVec(out->tyenv, in->stmts, NULL); + out->next = flatten_Expr(out->tyenv, out->stmts, in->next); out->jumpkind = in->jumpkind; out->offsIP = in->offsIP; return out; @@ -610,16 +630,16 @@ static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi ) } } - -static void redundant_get_removal_BB ( IRSB* bb ) +static +void redundant_get_removal_IRStmtVec(const IRTypeEnv* tyenv, IRStmtVec* stmts) { HashHW* env = newHHW(); UInt key = 0; /* keep gcc -O happy */ - Int i, j; + Int j; HWord val; - for (i = 0; i < bb->stmts_used; i++) { - IRStmt* st = bb->stmts[i]; + for (UInt i = 0; i < stmts->stmts_used; i++) { + IRStmt* st = stmts->stmts[i]; if (st->tag == Ist_NoOp) continue; @@ -640,7 +660,7 @@ static void redundant_get_removal_BB ( IRSB* bb ) be to stick in a reinterpret-style cast, although that would make maintaining flatness more difficult. */ IRExpr* valE = (IRExpr*)val; - Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE) + Bool typesOK = toBool( typeOfIRExpr(tyenv, valE) == st->Ist.WrTmp.data->Iex.Get.ty ); if (typesOK && DEBUG_IROPT) { vex_printf("rGET: "); ppIRExpr(get); @@ -648,7 +668,7 @@ static void redundant_get_removal_BB ( IRSB* bb ) vex_printf("\n"); } if (typesOK) - bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); + stmts->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE); } else { /* Not found, but at least we know that t and the Get(...) are now associated. So add a binding to reflect that @@ -664,7 +684,7 @@ static void redundant_get_removal_BB ( IRSB* bb ) UInt k_lo, k_hi; if (st->tag == Ist_Put) { key = mk_key_GetPut( st->Ist.Put.offset, - typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); + typeOfIRExpr(tyenv, st->Ist.Put.data) ); } else { vassert(st->tag == Ist_PutI); key = mk_key_GetIPutI( st->Ist.PutI.details->descr ); @@ -700,8 +720,19 @@ static void redundant_get_removal_BB ( IRSB* bb ) addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data)); } - } /* for (i = 0; i < bb->stmts_used; i++) */ + if (st->tag == Ist_IfThenElse) { + /* Consider "then" and "else" legs in isolation. */ + IRIfThenElse* ite = st->Ist.IfThenElse.details; + redundant_get_removal_IRStmtVec(tyenv, ite->then_leg); + redundant_get_removal_IRStmtVec(tyenv, ite->else_leg); + } + + } /* for (UInt i = 0; i < stmts->stmts_used; i++) */ +} +static void redundant_get_removal_BB(IRSB* bb) +{ + redundant_get_removal_IRStmtVec(bb->tyenv, bb->stmts); } @@ -713,8 +744,8 @@ static void redundant_get_removal_BB ( IRSB* bb ) overlapping ranges listed in env. Due to the flattening phase, the only stmt kind we expect to find a Get on is IRStmt_WrTmp. */ -static void handle_gets_Stmt ( - HashHW* env, +static void handle_gets_Stmt ( + HashHW* env, IRStmt* st, Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), VexRegisterUpdates pxControl @@ -817,9 +848,14 @@ static void handle_gets_Stmt ( case Ist_IMark: break; + case Ist_IfThenElse: + /* Recursing into "then" and "else" branches is done in + redundant_put_removal_IRStmtVec() */ + break; + default: vex_printf("\n"); - ppIRStmt(st); + ppIRStmt(st, NULL, 0); vex_printf("\n"); vpanic("handle_gets_Stmt"); } @@ -882,30 +918,17 @@ static void handle_gets_Stmt ( and loads/stores. */ -static void redundant_put_removal_BB ( - IRSB* bb, +static void redundant_put_removal_IRStmtVec( + IRTypeEnv* tyenv, IRStmtVec* stmts, Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), - VexRegisterUpdates pxControl - ) + VexRegisterUpdates pxControl, + HashHW* env) { - Int i, j; - Bool isPut; - IRStmt* st; - UInt key = 0; /* keep gcc -O happy */ - - vassert(pxControl < VexRegUpdAllregsAtEachInsn); - - HashHW* env = newHHW(); - - /* Initialise the running env with the fact that the final exit - writes the IP (or, whatever it claims to write. We don't - care.) */ - key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); - addToHHW(env, (HWord)key, 0); - /* And now scan backwards through the statements. */ - for (i = bb->stmts_used-1; i >= 0; i--) { - st = bb->stmts[i]; + for (Int i = stmts->stmts_used - 1; i >= 0; i--) { + IRStmt* st = stmts->stmts[i]; + Bool isPut; + UInt key; if (st->tag == Ist_NoOp) continue; @@ -933,7 +956,7 @@ static void redundant_put_removal_BB ( // typeOfIRConst(st->Ist.Exit.dst)); //re_add = lookupHHW(env, NULL, key); /* (2) */ - for (j = 0; j < env->used; j++) + for (UInt j = 0; j < env->used; j++) env->inuse[j] = False; /* (3) */ //if (0 && re_add) @@ -946,7 +969,7 @@ static void redundant_put_removal_BB ( case Ist_Put: isPut = True; key = mk_key_GetPut( st->Ist.Put.offset, - typeOfIRExpr(bb->tyenv,st->Ist.Put.data) ); + typeOfIRExpr(tyenv, st->Ist.Put.data) ); vassert(isIRAtom(st->Ist.Put.data)); break; case Ist_PutI: @@ -968,10 +991,10 @@ static void redundant_put_removal_BB ( /* This Put is redundant because a later one will overwrite it. So NULL (nop) it out. */ if (DEBUG_IROPT) { - vex_printf("rPUT: "); ppIRStmt(st); + vex_printf("rPUT: "); ppIRStmt(st, tyenv, 0); vex_printf("\n"); } - bb->stmts[i] = IRStmt_NoOp(); + stmts->stmts[i] = IRStmt_NoOp(); } else { /* We can't demonstrate that this Put is redundant, so add it to the running collection. */ @@ -986,9 +1009,38 @@ static void redundant_put_removal_BB ( deals with implicit reads of guest state needed to maintain precise exceptions. */ handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl ); + + /* Consider "then" and "else" legs in isolation. They get a new env. */ + if (st->tag == Ist_IfThenElse) { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + redundant_put_removal_IRStmtVec(tyenv, ite->then_leg, preciseMemExnsFn, + pxControl, newHHW()); + redundant_put_removal_IRStmtVec(tyenv, ite->else_leg, preciseMemExnsFn, + pxControl, newHHW()); + } } } +static void redundant_put_removal_BB( + IRSB* bb, + Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates), + VexRegisterUpdates pxControl) +{ + vassert(pxControl < VexRegUpdAllregsAtEachInsn); + + HashHW* env = newHHW(); + + /* Initialise the running env with the fact that the final exit + writes the IP (or, whatever it claims to write. We don't + care.) */ + UInt key = mk_key_GetPut(bb->offsIP, + typeOfIRExpr(bb->tyenv, bb->next)); + addToHHW(env, (HWord)key, 0); + + redundant_put_removal_IRStmtVec(bb->tyenv, bb->stmts, preciseMemExnsFn, + pxControl, env); +} + /*---------------------------------------------------------------*/ /*--- Constant propagation and folding ---*/ @@ -1023,8 +1075,43 @@ static UInt num_nodes_visited; #define NODE_LIMIT 30 -/* The env in this section is a map from IRTemp to IRExpr*, - that is, an array indexed by IRTemp. */ +/* The env in this section is a structure which holds: + - A map from IRTemp to IRExpr*, that is, an array indexed by IRTemp. + Keys are IRTemp.indices. Values are IRExpr*s. + - IR Type Environment + - Current IRStmtVec* which is being constructed. + - A pointer to the parent env (or NULL). */ +typedef + struct _SubstEnv { + IRExpr** map; + IRTypeEnv* tyenv; + IRStmtVec* stmts; + struct _SubstEnv* parent; + } + SubstEnv; + +/* Sets up the substitution environment. + Note that the map is established fresh new for every IRStmtVec (which are + thus considered in isolation). */ +static SubstEnv* newSubstEnv(IRTypeEnv* tyenv, IRStmtVec* stmts_in, + SubstEnv* parent_env) +{ + IRStmtVec* stmts_out = emptyIRStmtVec(); + stmts_out->id = stmts_in->id; + stmts_out->parent = (parent_env != NULL) ? parent_env->stmts : NULL; + stmts_out->defset = deepCopyIRTempDefSet(stmts_in->defset); + + SubstEnv* env = LibVEX_Alloc_inline(sizeof(SubstEnv)); + env->tyenv = tyenv; + env->stmts = stmts_out; + env->parent = parent_env; + + UInt n_tmps = tyenv->used; + env->map = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); + for (UInt i = 0; i < n_tmps; i++) + env->map[i] = (parent_env == NULL) ? NULL : parent_env->map[i]; + return env; +} /* Do both expressions compute the same value? The answer is generally conservative, i.e. it will report that the expressions do not compute @@ -1043,27 +1130,33 @@ static UInt num_nodes_visited; slower out of line general case. Saves a few insns. */ __attribute__((noinline)) -static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); +static Bool sameIRExprs_aux2(const SubstEnv* env, const IRExpr* e1, + const IRExpr* e2); inline -static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) +static Bool sameIRExprs_aux(const SubstEnv* env, const IRExpr* e1, + const IRExpr* e2) { if (e1->tag != e2->tag) return False; return sameIRExprs_aux2(env, e1, e2); } __attribute__((noinline)) -static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) +static Bool sameIRExprs_aux2(const SubstEnv* env, const IRExpr* e1, + const IRExpr* e2) { if (num_nodes_visited++ > NODE_LIMIT) return False; switch (e1->tag) { - case Iex_RdTmp: - if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; - - if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { - Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp], - env[e2->Iex.RdTmp.tmp]); + case Iex_RdTmp: { + IRTemp tmp1 = e1->Iex.RdTmp.tmp; + IRTemp tmp2 = e2->Iex.RdTmp.tmp; + + if (tmp1 == tmp2) return True; + const IRExpr* subst1 = env->map[tmp1]; + const IRExpr* subst2 = env->map[tmp2]; + if (subst1 != NULL && subst2 != NULL) { + Bool same = sameIRExprs_aux(env, subst1, subst2); #if STATS_IROPT recursed = True; if (same) recursion_helped = True; @@ -1071,6 +1164,7 @@ static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) return same; } return False; + } case Iex_Get: case Iex_GetI: @@ -1131,7 +1225,7 @@ static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) } inline -static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) +static Bool sameIRExprs(const SubstEnv* env, const IRExpr* e1, const IRExpr* e2) { Bool same; @@ -1160,8 +1254,8 @@ static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0. Bad because it duplicates functionality from typeOfIRExpr. See comment on the single use point below for rationale. */ -static -Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) +static Bool +debug_only_hack_sameIRExprs_might_assert(const IRExpr* e1, const IRExpr* e2) { if (e1->tag != e2->tag) return False; switch (e1->tag) { @@ -1179,7 +1273,7 @@ Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 ) /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ -static Bool isZeroU32 ( IRExpr* e ) +static Bool isZeroU32(const IRExpr* e) { return toBool( e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U32 @@ -1189,7 +1283,7 @@ static Bool isZeroU32 ( IRExpr* e ) /* Is this literally IRExpr_Const(IRConst_U64(0)) ? Currently unused; commented out to avoid compiler warning */ #if 0 -static Bool isZeroU64 ( IRExpr* e ) +static Bool isZeroU64(const IRExpr* e) { return toBool( e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U64 @@ -1198,7 +1292,7 @@ static Bool isZeroU64 ( IRExpr* e ) #endif /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */ -static Bool isZeroV128 ( IRExpr* e ) +static Bool isZeroV128(const IRExpr* e) { return toBool( e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_V128 @@ -1206,7 +1300,7 @@ static Bool isZeroV128 ( IRExpr* e ) } /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */ -static Bool isZeroV256 ( IRExpr* e ) +static Bool isZeroV256(const IRExpr* e) { return toBool( e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_V256 @@ -1214,7 +1308,7 @@ static Bool isZeroV256 ( IRExpr* e ) } /* Is this an integer constant with value 0 ? */ -static Bool isZeroU ( IRExpr* e ) +static Bool isZeroU(const IRExpr* e) { if (e->tag != Iex_Const) return False; switch (e->Iex.Const.con->tag) { @@ -1229,7 +1323,7 @@ static Bool isZeroU ( IRExpr* e ) } /* Is this an integer constant with value 1---1b ? */ -static Bool isOnesU ( IRExpr* e ) +static Bool isOnesU(const IRExpr* e) { if (e->tag != Iex_Const) return False; switch (e->Iex.Const.con->tag) { @@ -1346,29 +1440,29 @@ static UInt fold_Clz32 ( UInt value ) return NULL if it can't resolve 'e' to a new expression, which will be the case if 'e' is instead defined by an IRStmt (IRDirty or LLSC). */ -static IRExpr* chase ( IRExpr** env, IRExpr* e ) +static IRExpr* chase(SubstEnv* env, IRExpr* e) { /* Why is this loop guaranteed to terminate? Because all tmps must have definitions before use, hence a tmp cannot be bound (directly or indirectly) to itself. */ while (e->tag == Iex_RdTmp) { if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); } - e = env[(Int)e->Iex.RdTmp.tmp]; + e = env->map[e->Iex.RdTmp.tmp]; if (e == NULL) break; } return e; } /* Similar to |chase|, but follows at most one level of tmp reference. */ -static IRExpr* chase1 ( IRExpr** env, IRExpr* e ) +static IRExpr* chase1(IRExpr* env[], IRExpr* e) { if (e == NULL || e->tag != Iex_RdTmp) return e; else - return env[(Int)e->Iex.RdTmp.tmp]; + return env[e->Iex.RdTmp.tmp]; } -static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) +static IRExpr* fold_Expr(SubstEnv* env, IRExpr* e) { Int shift; IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ @@ -2428,13 +2522,12 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) /* Apply the subst to a simple 1-level expression -- guaranteed to be 1-level due to previous flattening pass. */ - -static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) +static IRExpr* subst_Expr(SubstEnv* env, IRExpr* ex) { switch (ex->tag) { - case Iex_RdTmp: - if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { - IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; + case Iex_RdTmp: { + IRExpr* rhs = env->map[ex->Iex.RdTmp.tmp]; + if (rhs != NULL) { if (rhs->tag == Iex_RdTmp) return rhs; if (rhs->tag == Iex_Const @@ -2443,6 +2536,7 @@ static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) } /* not bound in env */ return ex; + } case Iex_Const: case Iex_Get: @@ -2539,16 +2633,39 @@ static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex ) } } +/* A phi node of the form dst = phi(srcThen, srcElse) behaves much like + a WrTmp. Ensure that the connection between src{Then,Else} and assignments + in the corresponding leg is not broken: + - either add WrTemp assignment for src{Then,Else} in the leg, or + - adjust src{Then,Else} for the phi node. */ +static void subst_and_fold_PhiNodes(SubstEnv* env, IRStmtVec* stmts, + Bool srcThen, IRPhiVec* phi_nodes) +{ + for (UInt i = 0; i < phi_nodes->phis_used; i++) { + IRPhi* phi = phi_nodes->phis[i]; + IRTemp* tmp = (srcThen) ? &phi->srcThen : &phi->srcElse; + IRExpr* expr = env->map[*tmp]; + vassert(expr != NULL); + + if (expr->tag == Iex_RdTmp) { + *tmp = expr->Iex.RdTmp.tmp; + } else { + addStmtToIRStmtVec(stmts, IRStmt_WrTmp(*tmp, expr)); + } + } +} + +static IRStmtVec* subst_and_fold_Stmts(SubstEnv* env, IRStmtVec* in); /* Apply the subst to stmt, then fold the result as much as possible. Much simplified due to stmt being previously flattened. As a result of this, the stmt may wind up being turned into a no-op. */ -static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) +static IRStmt* subst_and_fold_Stmt(SubstEnv* env, IRStmt* st) { # if 0 vex_printf("\nsubst and fold stmt\n"); - ppIRStmt(st); + ppIRStmt(st, env->tyenv, 0); vex_printf("\n"); # endif @@ -2739,7 +2856,7 @@ static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) at this point, which is tricky. Such truncation is done later by the dead-code elimination pass. */ /* fall out into the reconstruct-the-exit code. */ - if (vex_control.iropt_verbosity > 0) + if (vex_control.iropt_verbosity > 0) /* really a misuse of vex_control.iropt_verbosity */ vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); } @@ -2748,51 +2865,80 @@ static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) st->Ist.Exit.dst, st->Ist.Exit.offsIP); } + case Ist_IfThenElse: { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + vassert(isIRAtom(ite->cond)); + IRExpr *fcond = fold_Expr(env, subst_Expr(env, ite->cond)); + if (fcond->tag == Iex_Const) { + /* Interesting. The condition on this "if-then-else" has folded down + to a constant. */ + vassert(fcond->Iex.Const.con->tag == Ico_U1); + if (fcond->Iex.Const.con->Ico.U1 == True) { + /* TODO-JIT: "else" leg is never going to happen, so dump it. */ + if (vex_control.iropt_verbosity > 0) + vex_printf("vex iropt: IRStmt_IfThenElse became " + "unconditional\n"); + } else { + vassert(fcond->Iex.Const.con->Ico.U1 == False); + /* TODO-JIT: "then" leg is never going to happen, so dump it. */ + if (vex_control.iropt_verbosity > 0) + vex_printf("vex iropt: IRStmt_IfThenElse became " + "unconditional\n"); + } + /* TODO-JIT: Pull the only remaining leg into the current IRStmtVec. + Here is what needs to be done: + 1. Rewrite ID of all IRTemp's (in tyenv->ids) defined in the + pulled leg. These are tracked in leg's defset. + 2. Insert all statements from the leg in the env->stmts_out + at the current position. */ + vpanic("IfThenElse leg lifting unimplemented"); + } + + SubstEnv* then_env = newSubstEnv(env->tyenv, ite->then_leg, env); + IRStmtVec* then_stmts = subst_and_fold_Stmts(then_env, ite->then_leg); + subst_and_fold_PhiNodes(then_env, then_stmts, True /* srcThen */, + ite->phi_nodes); + + SubstEnv* else_env = newSubstEnv(env->tyenv, ite->else_leg, env); + IRStmtVec* else_stmts = subst_and_fold_Stmts(else_env, ite->else_leg); + subst_and_fold_PhiNodes(else_env, else_stmts, False /* srcThen */, + ite->phi_nodes); + + return IRStmt_IfThenElse(fcond, ite->hint, then_stmts, else_stmts, + ite->phi_nodes); + } + default: - vex_printf("\n"); ppIRStmt(st); + vex_printf("\n"); ppIRStmt(st, env->tyenv, 0); vpanic("subst_and_fold_Stmt"); } } - -IRSB* cprop_BB ( IRSB* in ) +/* Is to be called with already created SubstEnv as per newSubstEnv(). */ +static IRStmtVec* subst_and_fold_Stmts(SubstEnv* env, IRStmtVec* in) { - Int i; - IRSB* out; - IRStmt* st2; - Int n_tmps = in->tyenv->types_used; - IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*)); /* Keep track of IRStmt_LoadGs that we need to revisit after processing all the other statements. */ const Int N_FIXUPS = 16; Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */ Int n_fixups = 0; - out = emptyIRSB(); - out->tyenv = deepCopyIRTypeEnv( in->tyenv ); - - /* Set up the env with which travels forward. This holds a - substitution, mapping IRTemps to IRExprs. The environment - is to be applied as we move along. Keys are IRTemps. - Values are IRExpr*s. - */ - for (i = 0; i < n_tmps; i++) - env[i] = NULL; + IRStmtVec* out = env->stmts; /* For each original SSA-form stmt ... */ - for (i = 0; i < in->stmts_used; i++) { + for (UInt i = 0; i < in->stmts_used; i++) { /* First apply the substitution to the current stmt. This propagates in any constants and tmp-tmp assignments accumulated prior to this point. As part of the subst_Stmt call, also then fold any constant expressions resulting. */ - st2 = in->stmts[i]; + IRStmt* st2 = in->stmts[i]; /* perhaps st2 is already a no-op? */ if (st2->tag == Ist_NoOp) continue; - st2 = subst_and_fold_Stmt( env, st2 ); + st2 = subst_and_fold_Stmt(env, st2); /* Deal with some post-folding special cases. */ switch (st2->tag) { @@ -2806,9 +2952,9 @@ IRSB* cprop_BB ( IRSB* in ) running environment. This is for the benefit of copy propagation and to allow sameIRExpr look through IRTemps. */ - case Ist_WrTmp: { - vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); - env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; + case Ist_WrTmp: + vassert(env->map[st2->Ist.WrTmp.tmp] == NULL); + env->map[st2->Ist.WrTmp.tmp] = st2->Ist.WrTmp.data; /* 't1 = t2' -- don't add to BB; will be optimized out */ if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) @@ -2824,7 +2970,6 @@ IRSB* cprop_BB ( IRSB* in ) } /* else add it to the output, as normal */ break; - } case Ist_LoadG: { IRLoadG* lg = st2->Ist.LoadG.details; @@ -2844,7 +2989,7 @@ IRSB* cprop_BB ( IRSB* in ) vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); if (n_fixups < N_FIXUPS) { fixups[n_fixups++] = out->stmts_used; - addStmtToIRSB( out, IRStmt_NoOp() ); + addStmtToIRStmtVec(out, IRStmt_NoOp()); } } /* And always add the LoadG to the output, regardless. */ @@ -2855,24 +3000,14 @@ IRSB* cprop_BB ( IRSB* in ) break; } - /* Not interesting, copy st2 into the output block. */ - addStmtToIRSB( out, st2 ); + /* Not interesting, copy st2 into the output vector. */ + addStmtToIRStmtVec(out, st2); } -# if STATS_IROPT - vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", - invocation_count, recursion_count, success_count, - recursion_success_count, max_nodes_visited); -# endif - - out->next = subst_Expr( env, in->next ); - out->jumpkind = in->jumpkind; - out->offsIP = in->offsIP; - /* Process any leftover unconditional LoadGs that we noticed in the main pass. */ vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS); - for (i = 0; i < n_fixups; i++) { + for (UInt i = 0; i < n_fixups; i++) { Int ix = fixups[i]; /* Carefully verify that the LoadG has the expected form. */ vassert(ix >= 0 && ix+1 < out->stmts_used); @@ -2901,7 +3036,7 @@ IRSB* cprop_BB ( IRSB* in ) } /* Replace the placeholder NoOp by the required unconditional load. */ - IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg); + IRTemp tLoaded = newIRTemp(env->tyenv, out, cvtArg); out->stmts[ix] = IRStmt_WrTmp(tLoaded, IRExpr_Load(lg->end, cvtArg, lg->addr)); @@ -2917,6 +3052,26 @@ IRSB* cprop_BB ( IRSB* in ) return out; } +IRSB* cprop_BB ( IRSB* in ) +{ + SubstEnv* env = newSubstEnv(in->tyenv, in->stmts, NULL); + IRSB* out = emptyIRSB(); + out->tyenv = deepCopyIRTypeEnv(in->tyenv); + out->stmts = subst_and_fold_Stmts(env, in->stmts); + out->id_seq = in->id_seq; + out->next = subst_Expr( env, in->next ); + out->jumpkind = in->jumpkind; + out->offsIP = in->offsIP; + +# if STATS_IROPT + vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n", + invocation_count, recursion_count, success_count, + recursion_success_count, max_nodes_visited); +# endif + + return out; +} + /*---------------------------------------------------------------*/ /*--- Dead code (t = E) removal ---*/ @@ -2932,7 +3087,7 @@ IRSB* cprop_BB ( IRSB* in ) inline static void addUses_Temp ( Bool* set, IRTemp tmp ) { - set[(Int)tmp] = True; + set[tmp] = True; } static void addUses_Expr ( Bool* set, IRExpr* e ) @@ -2985,9 +3140,11 @@ static void addUses_Expr ( Bool* set, IRExpr* e ) } } +static void do_deadcode_IRStmtVec(Bool* set, IRStmtVec* stmts, + Int* i_unconditional_exit); + static void addUses_Stmt ( Bool* set, IRStmt* st ) { - Int i; IRDirty* d; IRCAS* cas; switch (st->tag) { @@ -3043,7 +3200,7 @@ static void addUses_Stmt ( Bool* set, IRStmt* st ) if (d->mFx != Ifx_None) addUses_Expr(set, d->mAddr); addUses_Expr(set, d->guard); - for (i = 0; d->args[i] != NULL; i++) { + for (UInt i = 0; d->args[i] != NULL; i++) { IRExpr* arg = d->args[i]; if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) addUses_Expr(set, arg); @@ -3056,9 +3213,26 @@ static void addUses_Stmt ( Bool* set, IRStmt* st ) case Ist_Exit: addUses_Expr(set, st->Ist.Exit.guard); return; + case Ist_IfThenElse: { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + addUses_Expr(set, ite->cond); + + for (UInt i = 0; i < ite->phi_nodes->phis_used; i++) { + const IRPhi* phi = ite->phi_nodes->phis[i]; + addUses_Temp(set, phi->srcThen); + addUses_Temp(set, phi->srcElse); + } + + Int i_unconditional_exit; // TODO-JIT: unused at the moment + /* Consider both legs simultaneously. If either of them reports an + IRTemp in use, then it won't be eliminated. */ + do_deadcode_IRStmtVec(set, ite->then_leg, &i_unconditional_exit); + do_deadcode_IRStmtVec(set, ite->else_leg, &i_unconditional_exit); + return; + } default: vex_printf("\n"); - ppIRStmt(st); + ppIRStmt(st, NULL, 0); vpanic("addUses_Stmt"); } } @@ -3095,40 +3269,27 @@ static Bool isOneU1 ( IRExpr* e ) all statements following it are turned into no-ops. */ -/* notstatic */ void do_deadcode_BB ( IRSB* bb ) +static void do_deadcode_IRStmtVec(Bool* set, IRStmtVec* stmts, + Int* i_unconditional_exit) { - Int i, i_unconditional_exit; - Int n_tmps = bb->tyenv->types_used; - Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool)); - IRStmt* st; - - for (i = 0; i < n_tmps; i++) - set[i] = False; - - /* start off by recording IRTemp uses in the next field. */ - addUses_Expr(set, bb->next); - - /* First pass */ + *i_unconditional_exit = -1; /* Work backwards through the stmts */ - i_unconditional_exit = -1; - for (i = bb->stmts_used-1; i >= 0; i--) { - st = bb->stmts[i]; + for (Int i = stmts->stmts_used - 1; i >= 0; i--) { + IRStmt* st = stmts->stmts[i]; if (st->tag == Ist_NoOp) continue; /* take note of any unconditional exits */ - if (st->tag == Ist_Exit - && isOneU1(st->Ist.Exit.guard)) - i_unconditional_exit = i; - if (st->tag == Ist_WrTmp - && set[(Int)(st->Ist.WrTmp.tmp)] == False) { + if (st->tag == Ist_Exit && isOneU1(st->Ist.Exit.guard)) + *i_unconditional_exit = i; + if (st->tag == Ist_WrTmp && set[st->Ist.WrTmp.tmp] == False) { /* it's an IRTemp which never got used. Delete it. */ if (DEBUG_IROPT) { vex_printf("DEAD: "); - ppIRStmt(st); + ppIRStmt(st, NULL, 0); vex_printf("\n"); } - bb->stmts[i] = IRStmt_NoOp(); + stmts->stmts[i] = IRStmt_NoOp(); } else if (st->tag == Ist_Dirty @@ -3136,30 +3297,44 @@ static Bool isOneU1 ( IRExpr* e ) && isZeroU1(st->Ist.Dirty.details->guard)) { /* This is a dirty helper which will never get called. Delete it. */ - bb->stmts[i] = IRStmt_NoOp(); + stmts->stmts[i] = IRStmt_NoOp(); } else { /* Note any IRTemp uses made by the current statement. */ addUses_Stmt(set, st); } } +} + +void do_deadcode_BB(IRSB* bb) +{ + Int i_unconditional_exit; + UInt n_tmps = bb->tyenv->used; + Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool)); + for (UInt i = 0; i < n_tmps; i++) + set[i] = False; + + /* start off by recording IRTemp uses in the next field. */ + addUses_Expr(set, bb->next); + + /* First pass */ + do_deadcode_IRStmtVec(set, bb->stmts, &i_unconditional_exit); /* Optional second pass: if any unconditional exits were found, delete them and all following statements. */ - if (i_unconditional_exit != -1) { if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n", i_unconditional_exit); vassert(i_unconditional_exit >= 0 - && i_unconditional_exit < bb->stmts_used); + && i_unconditional_exit < bb->stmts->stmts_used); bb->next - = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); + = IRExpr_Const(bb->stmts->stmts[i_unconditional_exit]->Ist.Exit.dst); bb->jumpkind - = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; + = bb->stmts->stmts[i_unconditional_exit]->Ist.Exit.jk; bb->offsIP - = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; - for (i = i_unconditional_exit; i < bb->stmts_used; i++) - bb->stmts[i] = IRStmt_NoOp(); + = bb->stmts->stmts[i_unconditional_exit]->Ist.Exit.offsIP; + for (UInt i = i_unconditional_exit; i < bb->stmts->stmts_used; i++) + bb->stmts->stmts[i] = IRStmt_NoOp(); } } @@ -3169,19 +3344,22 @@ static Bool isOneU1 ( IRExpr* e ) /*--- collaboration with the front end ---*/ /*---------------------------------------------------------------*/ -static -IRSB* spec_helpers_BB( - IRSB* bb, - IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int) - ) +static void spec_helpers_IRStmtVec( + IRStmtVec* stmts, + IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int), + Bool* any) { - Int i; - IRStmt* st; IRExpr* ex; - Bool any = False; - for (i = bb->stmts_used-1; i >= 0; i--) { - st = bb->stmts[i]; + for (Int i = stmts->stmts_used - 1; i >= 0; i--) { + IRStmt* st = stmts->stmts[i]; + + if (st->tag == Ist_IfThenElse) { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + spec_helpers_IRStmtVec(ite->then_leg, specHelper, any); + spec_helpers_IRStmtVec(ite->else_leg, specHelper, any); + continue; + } if (st->tag != Ist_WrTmp || st->Ist.WrTmp.data->tag != Iex_CCall) @@ -3189,15 +3367,14 @@ IRSB* spec_helpers_BB( ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name, st->Ist.WrTmp.data->Iex.CCall.args, - &bb->stmts[0], i ); + &stmts->stmts[0], i ); if (!ex) /* the front end can't think of a suitable replacement */ continue; - /* We got something better. Install it in the bb. */ - any = True; - bb->stmts[i] - = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); + /* We got something better. Install it in stmts. */ + *any = True; + stmts->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex); if (0) { vex_printf("SPEC: "); @@ -3207,10 +3384,22 @@ IRSB* spec_helpers_BB( vex_printf("\n"); } } +} - if (any) - bb = flatten_BB(bb); - return bb; +static +IRSB* spec_helpers_BB( + IRSB* bb, + IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int) + ) +{ + Bool any = False; + spec_helpers_IRStmtVec(bb->stmts, specHelper, &any); + + if (any) { + return flatten_BB(bb); + } else { + return bb; + } } @@ -3863,18 +4052,10 @@ static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd ) return NULL; } - -/* The BB is modified in-place. Returns True if any changes were - made. The caller can choose whether or not loads should be CSEd. - In the normal course of things we don't do that, since CSEing loads - is something of a dodgy proposition if the guest program is doing - some screwy stuff to do with races and spinloops. */ - -static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) +static Bool do_cse_IRStmtVec(const IRTypeEnv* tyenv, IRStmtVec* stmts, + Bool allowLoadsToBeCSEd) { - Int i, j, paranoia; - IRTemp t, q; - IRStmt* st; + Int j, paranoia; AvailExpr* eprime; AvailExpr* ae; Bool invalidate; @@ -3883,10 +4064,6 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */ HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */ - vassert(sizeof(IRTemp) <= sizeof(HWord)); - - if (0) { ppIRSB(bb); vex_printf("\n\n"); } - /* Iterate forwards over the stmts. On seeing "t = E", where E is one of the AvailExpr forms: let E' = apply tenv substitution to E @@ -3902,8 +4079,8 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) might invalidate some of the expressions in aenv. So there is an invalidate-bindings check for each statement seen. */ - for (i = 0; i < bb->stmts_used; i++) { - st = bb->stmts[i]; + for (UInt i = 0; i < stmts->stmts_used; i++) { + IRStmt* st = stmts->stmts[i]; /* ------ BEGIN invalidate aenv bindings ------ */ /* This is critical: remove from aenv any E' -> .. bindings @@ -3925,8 +4102,17 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: case Ist_WrTmp: case Ist_Exit: case Ist_LoadG: paranoia = 0; break; + case Ist_IfThenElse: { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + anyDone |= do_cse_IRStmtVec(tyenv, ite->then_leg, + allowLoadsToBeCSEd); + anyDone |= do_cse_IRStmtVec(tyenv, ite->else_leg, + allowLoadsToBeCSEd); + paranoia = 0; + break; + } default: - vpanic("do_cse_BB(1)"); + vpanic("do_cse_IRStmtVec(1)"); } if (paranoia > 0) { @@ -3954,7 +4140,7 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) ae->u.GetIt.descr, IRExpr_RdTmp(ae->u.GetIt.ix), st->Ist.Put.offset, - typeOfIRExpr(bb->tyenv,st->Ist.Put.data) + typeOfIRExpr(tyenv, st->Ist.Put.data) ) != NoAlias) invalidate = True; } @@ -3972,7 +4158,7 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) invalidate = True; } else - vpanic("do_cse_BB(2)"); + vpanic("do_cse_IRStmtVec(2)"); } if (invalidate) { @@ -3988,7 +4174,7 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) if (st->tag != Ist_WrTmp) continue; - t = st->Ist.WrTmp.tmp; + IRTemp t = st->Ist.WrTmp.tmp; eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd); /* ignore if not of AvailExpr form */ if (!eprime) @@ -4008,18 +4194,34 @@ static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) /* A binding E' -> q was found. Replace stmt by "t = q" and note the t->q binding in tenv. */ /* (this is the core of the CSE action) */ - q = (IRTemp)aenv->val[j]; - bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) ); - addToHHW( tenv, (HWord)t, (HWord)q ); + IRTemp q = (IRTemp) aenv->val[j]; + stmts->stmts[i] = IRStmt_WrTmp(t, IRExpr_RdTmp(q)); + addToHHW(tenv, (HWord) t, (HWord) q); anyDone = True; } else { /* No binding was found, so instead we add E' -> t to our collection of available expressions, replace this stmt with "t = E'", and move on. */ - bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) ); - addToHHW( aenv, (HWord)eprime, (HWord)t ); + stmts->stmts[i] = IRStmt_WrTmp(t, availExpr_to_IRExpr(eprime)); + addToHHW(aenv, (HWord) eprime, (HWord) t); } } + return anyDone; +} + +/* The BB is modified in-place. Returns True if any changes were + made. The caller can choose whether or not loads should be CSEd. + In the normal course of things we don't do that, since CSEing loads + is something of a dodgy proposition if the guest program is doing + some screwy stuff to do with races and spinloops. */ + +static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd ) +{ + vassert(sizeof(IRTemp) <= sizeof(HWord)); + + if (0) { ppIRSB(bb); vex_printf("\n\n"); } + + Bool anyDone = do_cse_IRStmtVec(bb->tyenv, bb->stmts, allowLoadsToBeCSEd); /* ppIRSB(bb); @@ -4062,13 +4264,11 @@ static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 ) other tmp2. Scan backwards from the specified start point -- an optimisation. */ -static Bool collapseChain ( IRSB* bb, Int startHere, - IRTemp tmp, - IRTemp* tmp2, Int* i32 ) +static Bool collapseChain(IRStmtVec* stmts, Int startHere, + IRTemp tmp, IRTemp* tmp2, Int* i32) { Int j, ii; IRTemp vv; - IRStmt* st; IRExpr* e; /* the (var, con) pair contain the current 'representation' for @@ -4079,7 +4279,7 @@ static Bool collapseChain ( IRSB* bb, Int startHere, /* Scan backwards to see if tmp can be replaced by some other tmp +/- a constant. */ for (j = startHere; j >= 0; j--) { - st = bb->stmts[j]; + IRStmt* st = stmts->stmts[j]; if (st->tag != Ist_WrTmp) continue; if (st->Ist.WrTmp.tmp != var) @@ -4106,14 +4306,13 @@ static Bool collapseChain ( IRSB* bb, Int startHere, /* ------- Main function for Add32/Sub32 chain collapsing ------ */ -static void collapse_AddSub_chains_BB ( IRSB* bb ) +static void collapse_AddSub_chains_IRStmtVec(IRStmtVec* stmts) { - IRStmt *st; IRTemp var, var2; - Int i, con, con2; + Int con, con2; - for (i = bb->stmts_used-1; i >= 0; i--) { - st = bb->stmts[i]; + for (Int i = stmts->stmts_used - 1; i >= 0; i--) { + IRStmt* st = stmts->stmts[i]; if (st->tag == Ist_NoOp) continue; @@ -4124,14 +4323,14 @@ static void collapse_AddSub_chains_BB ( IRSB* bb ) /* So e1 is of the form Add32(var,con) or Sub32(var,-con). Find out if var can be expressed as var2 + con2. */ - if (collapseChain(bb, i-1, var, &var2, &con2)) { + if (collapseChain(stmts, i - 1, var, &var2, &con2)) { if (DEBUG_IROPT) { vex_printf("replacing1 "); - ppIRStmt(st); + ppIRStmt(st, NULL, 0); vex_printf(" with "); } con2 += con; - bb->stmts[i] + stmts->stmts[i] = IRStmt_WrTmp( st->Ist.WrTmp.tmp, (con2 >= 0) @@ -4143,7 +4342,7 @@ static void collapse_AddSub_chains_BB ( IRSB* bb ) IRExpr_Const(IRConst_U32(-con2))) ); if (DEBUG_IROPT) { - ppIRStmt(bb->stmts[i]); + ppIRStmt(stmts->stmts[i], NULL, 0); vex_printf("\n"); } } @@ -4156,22 +4355,22 @@ static void collapse_AddSub_chains_BB ( IRSB* bb ) if (st->tag == Ist_WrTmp && st->Ist.WrTmp.data->tag == Iex_GetI && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp - && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix - ->Iex.RdTmp.tmp, &var2, &con2)) { + && collapseChain(stmts, i - 1, st->Ist.WrTmp.data->Iex.GetI.ix + ->Iex.RdTmp.tmp, &var2, &con2)) { if (DEBUG_IROPT) { vex_printf("replacing3 "); - ppIRStmt(st); + ppIRStmt(st, NULL, 0); vex_printf(" with "); } con2 += st->Ist.WrTmp.data->Iex.GetI.bias; - bb->stmts[i] + stmts->stmts[i] = IRStmt_WrTmp( st->Ist.WrTmp.tmp, IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr, IRExpr_RdTmp(var2), con2)); if (DEBUG_IROPT) { - ppIRStmt(bb->stmts[i]); + ppIRStmt(stmts->stmts[i], NULL, 0); vex_printf("\n"); } continue; @@ -4181,29 +4380,39 @@ static void collapse_AddSub_chains_BB ( IRSB* bb ) IRPutI *puti = st->Ist.PutI.details; if (st->tag == Ist_PutI && puti->ix->tag == Iex_RdTmp - && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp, - &var2, &con2)) { + && collapseChain(stmts, i-1, puti->ix->Iex.RdTmp.tmp, + &var2, &con2)) { if (DEBUG_IROPT) { vex_printf("replacing2 "); - ppIRStmt(st); + ppIRStmt(st, NULL, 0); vex_printf(" with "); } con2 += puti->bias; - bb->stmts[i] + stmts->stmts[i] = IRStmt_PutI(mkIRPutI(puti->descr, IRExpr_RdTmp(var2), con2, puti->data)); if (DEBUG_IROPT) { - ppIRStmt(bb->stmts[i]); + ppIRStmt(stmts->stmts[i], NULL, 0); vex_printf("\n"); } continue; } + if (st->tag == Ist_IfThenElse) { + IRIfThenElse* ite = st->Ist.IfThenElse.details; + collapse_AddSub_chains_IRStmtVec(ite->then_leg); + collapse_AddSub_chains_IRStmtVec(ite->else_leg); + } } /* for */ } +static void collapse_AddSub_chains_BB ( IRSB* bb ) +{ + collapse_AddSub_chains_IRStmtVec(bb->stmts); +} + /*----------------------------------------... [truncated message content] |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:36
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=448182d69df9dbe9a8cdad2019f6d85441f8cf0a commit 448182d69df9dbe9a8cdad2019f6d85441f8cf0a Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 06:20:57 2017 +0200 Implement support for If-Then-Else and Phi nodes into ir_defs.c. Diff: --- VEX/priv/ir_defs.c | 1024 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 784 insertions(+), 240 deletions(-) diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 8822800..f01eb40 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -33,6 +33,8 @@ without prior written permission. */ +/* Copyright (C) 2017-2017 Ivo Raisr <iv...@iv...> */ + #include "libvex_basictypes.h" #include "libvex_ir.h" #include "libvex.h" @@ -114,7 +116,7 @@ void ppIRTemp ( IRTemp tmp ) if (tmp == IRTemp_INVALID) vex_printf("IRTemp_INVALID"); else - vex_printf( "t%u", tmp); + vex_printf("t%u", tmp); } void ppIROp ( IROp op ) @@ -1571,8 +1573,73 @@ void ppIRMBusEvent ( IRMBusEvent event ) } } -void ppIRStmt ( const IRStmt* s ) +void ppIRPhi(const IRPhi* phi) +{ + ppIRTemp(phi->dst); + vex_printf(" = phi("); + ppIRTemp(phi->srcThen); + vex_printf(","); + ppIRTemp(phi->srcElse); + vex_printf(")"); +} + +static void print_depth(UInt depth) { + for (UInt i = 0; i < depth; i++) { + vex_printf(" "); + } +} + +void ppIRPhiVec(const IRPhiVec* phis, UInt depth) +{ + for (UInt i = 0; i < phis->phis_used; i++) { + print_depth(depth); + ppIRPhi(phis->phis[i]); + if (i < phis->phis_used - 1) { + vex_printf("\n"); + } + } +} + +void ppIRTempDefSet(const IRTempDefSet* defset, UInt depth) +{ + ppIRTypeEnvDefd(NULL, defset, depth); +} + +void ppIRIfThenElse_Hint(IRIfThenElse_Hint hint) { + switch (hint) { + case IfThenElse_ThenLikely: vex_printf("IfThenElse_ThenLikely"); break; + case IfThenElse_ElseLikely: vex_printf("IfThenElse_ElseLikely"); break; + default: vpanic("ppIRIfThenElse_Hint"); + } +} + +void ppIRIfThenElseCondHint(const IRIfThenElse* ite) +{ + vex_printf("if ("); + ppIRExpr(ite->cond); + vex_printf(") ["); + ppIRIfThenElse_Hint(ite->hint); + vex_printf("]"); +} + +void ppIRIfThenElse(const IRIfThenElse* ite, const IRTypeEnv* tyenv, UInt depth) +{ + ppIRIfThenElseCondHint(ite); + vex_printf(" then {\n"); + ppIRStmtVec(ite->then_leg, tyenv, depth + 1); + print_depth(depth); + vex_printf("} else {\n"); + ppIRStmtVec(ite->else_leg, tyenv, depth + 1); + print_depth(depth); + vex_printf("}\n"); + ppIRPhiVec(ite->phi_nodes, depth); +} + +void ppIRStmt(const IRStmt* s, const IRTypeEnv* tyenv, UInt depth) +{ + print_depth(depth); + if (!s) { vex_printf("!!! IRStmt* which is NULL !!!"); return; @@ -1653,41 +1720,83 @@ void ppIRStmt ( const IRStmt* s ) ppIRJumpKind(s->Ist.Exit.jk); vex_printf(" } "); break; - default: + case Ist_IfThenElse: + ppIRIfThenElse(s->Ist.IfThenElse.details, tyenv, depth); + break; + default: vpanic("ppIRStmt"); } } -void ppIRTypeEnv ( const IRTypeEnv* env ) +void ppIRTypeEnv(const IRTypeEnv* env) { - UInt i; - for (i = 0; i < env->types_used; i++) { + for (UInt i = 0; i < env->used; i++) { if (i % 8 == 0) - vex_printf( " "); + print_depth(1); ppIRTemp(i); - vex_printf( ":"); + vex_printf("[%u]:", env->ids[i]); ppIRType(env->types[i]); if (i % 8 == 7) vex_printf( "\n"); else vex_printf( " "); } - if (env->types_used > 0 && env->types_used % 8 != 7) + if (env->used > 0 && env->used % 8 != 7) vex_printf( "\n"); } -void ppIRSB ( const IRSB* bb ) +void ppIRTypeEnvDefd(const IRTypeEnv* tyenv, const IRTempDefSet* defset, + UInt depth) { - Int i; - vex_printf("IRSB {\n"); - ppIRTypeEnv(bb->tyenv); + UInt tmps_printed = 0; + + for (UInt slot = 0; slot < defset->slots_used; slot++) { + UChar slot_value = defset->set[slot]; + for (UInt bit = 0; bit < sizeof(UChar); bit++) { + if (slot_value & (1 << bit)) { + if (tmps_printed % 8 == 0) + print_depth(depth); + + IRTemp tmp = slot * sizeof(UChar) + bit; + ppIRTemp(tmp); + if (tyenv != NULL) { + vex_printf(":"); + ppIRType(tyenv->types[tmp]); + } + + if (tmps_printed % 8 == 7) { + vex_printf("\n"); + } else { + vex_printf(" "); + } + + tmps_printed += 1; + } + } + } + + if (tmps_printed > 0 && tmps_printed % 8 != 7) + vex_printf("\n"); +} + +void ppIRStmtVec(const IRStmtVec* stmts, const IRTypeEnv* tyenv, UInt depth) +{ + ppIRTypeEnvDefd(tyenv, stmts->defset, depth); vex_printf("\n"); - for (i = 0; i < bb->stmts_used; i++) { - vex_printf( " "); - ppIRStmt(bb->stmts[i]); - vex_printf( "\n"); + for (UInt i = 0; i < stmts->stmts_used; i++) { + ppIRStmt(stmts->stmts[i], tyenv, depth); + vex_printf("\n"); } - vex_printf( " PUT(%d) = ", bb->offsIP ); +} + +void ppIRSB ( const IRSB* bb ) +{ + UInt depth = 0; + + vex_printf("IRSB {\n"); + ppIRStmtVec(bb->stmts, bb->tyenv, depth + 1); + print_depth(depth + 1); + vex_printf("PUT(%d) = ", bb->offsIP); ppIRExpr( bb->next ); vex_printf( "; exit-"); ppIRJumpKind(bb->jumpkind); @@ -2132,6 +2241,49 @@ IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt, return lg; } +/* Constructors -- IRIfThenElse */ + +IRPhi* mkIRPhi(IRTemp dst, IRTemp srcThen, IRTemp srcElse) +{ + IRPhi* phi = LibVEX_Alloc_inline(sizeof(IRPhi)); + phi->dst = dst; + phi->srcThen = srcThen; + phi->srcElse = srcElse; + return phi; +} + +IRPhiVec* emptyIRPhiVec(void) +{ + IRPhiVec* vec = LibVEX_Alloc_inline(sizeof(IRPhiVec)); + vec->phis_used = 0; + vec->phis_size = 8; + vec->phis = LibVEX_Alloc_inline(vec->phis_size * sizeof(IRPhi*)); + return vec; +} + +IRTempDefSet* emptyIRTempDefSet(void) +{ + IRTempDefSet* defset = LibVEX_Alloc_inline(sizeof(IRTempDefSet)); + defset->slots_used = 0; + defset->slots_size = 8 / sizeof(UChar); + vassert(defset->slots_size >= 1); + defset->set = LibVEX_Alloc_inline(defset->slots_size * sizeof(UChar)); + return defset; +} + +IRIfThenElse* mkIRIfThenElse(IRExpr* cond, IRIfThenElse_Hint hint, + IRStmtVec* then_leg, IRStmtVec* else_leg, + IRPhiVec* phi_nodes) +{ + IRIfThenElse* ite = LibVEX_Alloc_inline(sizeof(IRIfThenElse)); + ite->cond = cond; + ite->hint = hint; + ite->then_leg = then_leg; + ite->else_leg = else_leg; + ite->phi_nodes = phi_nodes; + return ite; +} + /* Constructors -- IRStmt */ @@ -2243,40 +2395,76 @@ IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, return s; } +IRStmt* IRStmt_IfThenElse(IRExpr* cond, IRIfThenElse_Hint hint, + IRStmtVec* then_leg, IRStmtVec* else_leg, + IRPhiVec* phi_nodes) +{ + IRStmt* s = LibVEX_Alloc_inline(sizeof(IRStmt)); + s->tag = Ist_IfThenElse; + s->Ist.IfThenElse.details = mkIRIfThenElse(cond, hint, then_leg, else_leg, + phi_nodes); + return s; +} + /* Constructors -- IRTypeEnv */ -IRTypeEnv* emptyIRTypeEnv ( void ) +static IRTypeEnv* emptyIRTypeEnv ( void ) { IRTypeEnv* env = LibVEX_Alloc_inline(sizeof(IRTypeEnv)); env->types = LibVEX_Alloc_inline(8 * sizeof(IRType)); - env->types_size = 8; - env->types_used = 0; + env->ids = LibVEX_Alloc_inline(8 * sizeof(IRStmtVecID)); + env->size = 8; + env->used = 0; return env; } +/* Constructors -- IRStmtVec */ + +IRStmtVec* emptyIRStmtVec(void) +{ + IRStmtVec* stmts = LibVEX_Alloc_inline(sizeof(IRStmtVec)); + stmts->stmts_used = 0; + stmts->stmts_size = 8; + stmts->stmts = LibVEX_Alloc_inline(stmts->stmts_size * sizeof(IRStmt*)); + stmts->id = IRStmtVecID_INVALID; + stmts->parent = NULL; + stmts->defset = emptyIRTempDefSet(); + return stmts; +} + + /* Constructors -- IRSB */ IRSB* emptyIRSB ( void ) { - IRSB* bb = LibVEX_Alloc_inline(sizeof(IRSB)); - bb->tyenv = emptyIRTypeEnv(); - bb->stmts_used = 0; - bb->stmts_size = 8; - bb->stmts = LibVEX_Alloc_inline(bb->stmts_size * sizeof(IRStmt*)); - bb->next = NULL; - bb->jumpkind = Ijk_Boring; - bb->offsIP = 0; + IRSB* bb = LibVEX_Alloc_inline(sizeof(IRSB)); + bb->tyenv = emptyIRTypeEnv(); + bb->stmts = emptyIRStmtVec(); + bb->id_seq = 0; + bb->next = NULL; + bb->jumpkind = Ijk_Boring; + bb->offsIP = 0; + + bb->stmts->id = nextIRStmtVecID(bb); return bb; } +IRStmtVecID nextIRStmtVecID(IRSB* irsb) +{ + IRStmtVecID next = irsb->id_seq; + irsb->id_seq += 1; + vassert(irsb->id_seq != IRStmtVecID_INVALID); + return next; +} -/*---------------------------------------------------------------*/ -/*--- (Deep) copy constructors. These make complete copies ---*/ -/*--- the original, which can be modified without affecting ---*/ -/*--- the original. ---*/ -/*---------------------------------------------------------------*/ + +/*----------------------------------------------------------------*/ +/*--- (Deep) copy constructors. These make complete copies ---*/ +/*--- of the original, which can be modified without affecting ---*/ +/*--- the original. ---*/ +/*----------------------------------------------------------------*/ /* Copying IR Expr vectors (for call args). */ @@ -2441,7 +2629,35 @@ IRPutI* deepCopyIRPutI ( const IRPutI * puti ) deepCopyIRExpr(puti->data)); } -IRStmt* deepCopyIRStmt ( const IRStmt* s ) +IRPhi* deepCopyIRPhi(const IRPhi* phi) +{ + return mkIRPhi(phi->dst, phi->srcThen, phi->srcElse); +} + +IRPhiVec* deepCopyIRPhiVec(const IRPhiVec* vec) +{ + IRPhiVec* vec2 = LibVEX_Alloc_inline(sizeof(IRPhiVec)); + vec2->phis_used = vec2->phis_size = vec->phis_used; + IRPhi **phis2 = LibVEX_Alloc_inline(vec2->phis_used * sizeof(IRPhi*)); + for (UInt i = 0; i < vec2->phis_used; i++) + phis2[i] = deepCopyIRPhi(vec->phis[i]); + vec2->phis = phis2; + return vec2; +} + +IRTempDefSet* deepCopyIRTempDefSet(const IRTempDefSet* defset) +{ + IRTempDefSet* defset2 = LibVEX_Alloc_inline(sizeof(IRTempDefSet)); + defset2->slots_used = defset2->slots_size = defset->slots_used; + UChar* set2 = LibVEX_Alloc_inline(defset2->slots_used * sizeof(UChar)); + for (UInt i = 0; i < defset2->slots_used; i++) { + set2[i] = defset->set[i]; + } + defset2->set = set2; + return defset2; +} + +IRStmt* deepCopyIRStmt(const IRStmt* s, IRStmtVec* parent) { switch (s->tag) { case Ist_NoOp: @@ -2498,33 +2714,51 @@ IRStmt* deepCopyIRStmt ( const IRStmt* s ) s->Ist.Exit.jk, deepCopyIRConst(s->Ist.Exit.dst), s->Ist.Exit.offsIP); + case Ist_IfThenElse: { + const IRIfThenElse* ite = s->Ist.IfThenElse.details; + return IRStmt_IfThenElse(deepCopyIRExpr(ite->cond), ite->hint, + deepCopyIRStmtVec(ite->then_leg, parent), + deepCopyIRStmtVec(ite->else_leg, parent), + deepCopyIRPhiVec(ite->phi_nodes)); + } default: vpanic("deepCopyIRStmt"); } } -IRTypeEnv* deepCopyIRTypeEnv ( const IRTypeEnv* src ) +IRStmtVec* deepCopyIRStmtVec(const IRStmtVec* src, IRStmtVec* parent) +{ + IRStmtVec* vec2 = LibVEX_Alloc_inline(sizeof(IRStmtVec)); + vec2->id = src->id; + vec2->parent = parent; + vec2->defset = deepCopyIRTempDefSet(src->defset); + vec2->stmts_used = vec2->stmts_size = src->stmts_used; + IRStmt **stmts2 = LibVEX_Alloc_inline(vec2->stmts_used * sizeof(IRStmt*)); + for (UInt i = 0; i < vec2->stmts_used; i++) { + stmts2[i] = deepCopyIRStmt(src->stmts[i], vec2); + } + vec2->stmts = stmts2; + return vec2; +} + +IRTypeEnv* deepCopyIRTypeEnv(const IRTypeEnv* src) { - Int i; IRTypeEnv* dst = LibVEX_Alloc_inline(sizeof(IRTypeEnv)); - dst->types_size = src->types_size; - dst->types_used = src->types_used; - dst->types = LibVEX_Alloc_inline(dst->types_size * sizeof(IRType)); - for (i = 0; i < src->types_used; i++) + dst->size = src->size; + dst->used = src->used; + dst->types = LibVEX_Alloc_inline(dst->size * sizeof(IRType)); + dst->ids = LibVEX_Alloc_inline(dst->size * sizeof(IRStmtVecID)); + for (UInt i = 0; i < src->used; i++) { dst->types[i] = src->types[i]; + dst->ids[i] = src->ids[i]; + } return dst; } IRSB* deepCopyIRSB ( const IRSB* bb ) { - Int i; - IRStmt** sts2; IRSB* bb2 = deepCopyIRSBExceptStmts(bb); - bb2->stmts_used = bb2->stmts_size = bb->stmts_used; - sts2 = LibVEX_Alloc_inline(bb2->stmts_used * sizeof(IRStmt*)); - for (i = 0; i < bb2->stmts_used; i++) - sts2[i] = deepCopyIRStmt(bb->stmts[i]); - bb2->stmts = sts2; + bb2->stmts = deepCopyIRStmtVec(bb->stmts, NULL); return bb2; } @@ -2532,6 +2766,7 @@ IRSB* deepCopyIRSBExceptStmts ( const IRSB* bb ) { IRSB* bb2 = emptyIRSB(); bb2->tyenv = deepCopyIRTypeEnv(bb->tyenv); + bb2->id_seq = bb->id_seq; bb2->next = deepCopyIRExpr(bb->next); bb2->jumpkind = bb->jumpkind; bb2->offsIP = bb->offsIP; @@ -3550,53 +3785,147 @@ void typeOfPrimop ( IROp op, /*---------------------------------------------------------------*/ +/*--- Helper functions for the IR -- IR Phi Nodes ---*/ +/*---------------------------------------------------------------*/ + +void addIRPhiToIRPhiVec(IRPhiVec* phi_nodes, IRPhi* phi) +{ + if (phi_nodes->phis_used == phi_nodes->phis_size) { + IRPhi** phis2 + = LibVEX_Alloc_inline(2 * phi_nodes->phis_size * sizeof(IRPhi*)); + for (UInt i = 0; i < phi_nodes->phis_size; i++) + phis2[i] = phi_nodes->phis[i]; + phi_nodes->phis = phis2; + phi_nodes->phis_size *= 2; + } + + vassert(phi_nodes->phis_used < phi_nodes->phis_size); + phi_nodes->phis[phi_nodes->phis_used] = phi; + phi_nodes->phis_used += 1; +} + + +/*---------------------------------------------------------------*/ +/*--- Helper functions for the IR -- IR Temp Defined Set ---*/ +/*---------------------------------------------------------------*/ + +void setIRTempDefined(IRTempDefSet* defset, IRTemp tmp) +{ + UInt slots_required = (tmp + sizeof(UChar)) / sizeof(UChar); + + if (slots_required >= defset->slots_size) { + UInt new_size = (slots_required > 2 * defset->slots_size) ? + slots_required : 2 * defset->slots_size; + UChar* new_set = LibVEX_Alloc_inline(new_size * sizeof(UChar)); + for (UInt i = 0; i < defset->slots_used; i++) { + new_set[i] = defset->set[i]; + } + defset->set = new_set; + defset->slots_size = new_size; + } + + if (slots_required > defset->slots_used) { + for (UInt i = defset->slots_used; i < slots_required; i++) { + defset->set[i] = 0; + } + defset->slots_used = slots_required; + } + + vassert(!isIRTempDefined(defset, tmp)); + + UInt mask = (1 << (tmp % sizeof(UChar))); + defset->set[tmp / sizeof(UChar)] |= mask; +} + +void clearIRTempDefSet(IRTempDefSet* defset) +{ + for (UInt i = 0; i < defset->slots_used; i++) { + defset->set[i] = 0; + } +} + +/*---------------------------------------------------------------*/ /*--- Helper functions for the IR -- IR Basic Blocks ---*/ /*---------------------------------------------------------------*/ -void addStmtToIRSB ( IRSB* bb, IRStmt* st ) +void addStmtToIRStmtVec(IRStmtVec* stmts, IRStmt* st) { - Int i; - if (bb->stmts_used == bb->stmts_size) { - IRStmt** stmts2 = LibVEX_Alloc_inline(2 * bb->stmts_size * sizeof(IRStmt*)); - for (i = 0; i < bb->stmts_size; i++) - stmts2[i] = bb->stmts[i]; - bb->stmts = stmts2; - bb->stmts_size *= 2; + if (stmts->stmts_used == stmts->stmts_size) { + IRStmt** stmts2 + = LibVEX_Alloc_inline(2 * stmts->stmts_size * sizeof(IRStmt*)); + for (UInt i = 0; i < stmts->stmts_size; i++) + stmts2[i] = stmts->stmts[i]; + stmts->stmts = stmts2; + stmts->stmts_size *= 2; } - vassert(bb->stmts_used < bb->stmts_size); - bb->stmts[bb->stmts_used] = st; - bb->stmts_used++; + vassert(stmts->stmts_used < stmts->stmts_size); + stmts->stmts[stmts->stmts_used] = st; + stmts->stmts_used++; +} + +void addStmtToIRSB ( IRSB* bb, IRStmt* st ) +{ + addStmtToIRStmtVec(bb->stmts, st); } +IRStmt *addEmptyIfThenElse(IRSB* bb, IRStmtVec* parent, IRExpr* cond, + IRIfThenElse_Hint hint) +{ + IRStmtVec* then_leg = emptyIRStmtVec(); + then_leg->id = nextIRStmtVecID(bb); + then_leg->parent = parent; + + IRStmtVec* else_leg = emptyIRStmtVec(); + else_leg->id = nextIRStmtVecID(bb); + else_leg->parent = parent; + + IRStmt* st = IRStmt_IfThenElse(cond, hint, then_leg, else_leg, + emptyIRPhiVec()); + addStmtToIRStmtVec(parent, st); + return st; +} /*---------------------------------------------------------------*/ /*--- Helper functions for the IR -- IR Type Environments ---*/ /*---------------------------------------------------------------*/ -/* Allocate a new IRTemp, given its type. */ - -IRTemp newIRTemp ( IRTypeEnv* env, IRType ty ) +void ensureSpaceInIRTypeEnv(IRTypeEnv* env, UInt new_size) { - vassert(env); - vassert(env->types_used >= 0); - vassert(env->types_size >= 0); - vassert(env->types_used <= env->types_size); - if (env->types_used < env->types_size) { - env->types[env->types_used] = ty; - return env->types_used++; - } else { - Int i; - Int new_size = env->types_size==0 ? 8 : 2*env->types_size; - IRType* new_types - = LibVEX_Alloc_inline(new_size * sizeof(IRType)); - for (i = 0; i < env->types_used; i++) + vassert(env != NULL); + + if (new_size > env->size) { + IRType* new_types = LibVEX_Alloc_inline(new_size * sizeof(IRType)); + IRStmtVecID* new_ids = LibVEX_Alloc_inline(new_size * sizeof(IRStmtVecID)); + for (UInt i = 0; i < env->used; i++) { new_types[i] = env->types[i]; - env->types = new_types; - env->types_size = new_size; - return newIRTemp(env, ty); + new_ids[i] = env->ids[i]; + } + env->types = new_types; + env->ids = new_ids; + env->size = new_size; } } +IRTemp newIRTemp(IRTypeEnv* env, IRStmtVec* stmts, IRType ty) +{ + vassert(env != NULL); + vassert(stmts != NULL); + vassert(env->used >= 0); + vassert(env->size >= 0); + vassert(env->used <= env->size); + + if (env->used == env->size) { + ensureSpaceInIRTypeEnv(env, 2 * env->size); + } + + IRTemp tmp = env->used; + env->used += 1; + env->types[tmp] = ty; + env->ids[tmp] = stmts->id; + setIRTempDefined(stmts->defset, tmp); + return tmp; +} + /*---------------------------------------------------------------*/ /*--- Helper functions for the IR -- finding types of exprs ---*/ @@ -3606,7 +3935,7 @@ inline IRType typeOfIRTemp ( const IRTypeEnv* env, IRTemp tmp ) { vassert(tmp >= 0); - vassert(tmp < env->types_used); + vassert(tmp < env->used); return env->types[tmp]; } @@ -3736,6 +4065,9 @@ static inline Bool isIRAtom_or_VECRET_or_GSPTR ( const IRExpr* e ) return UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e)); } +static Bool isFlatIRStmtVec(const IRStmtVec* stmts); + +static Bool isFlatIRStmt ( const IRStmt* st ) { Int i; @@ -3835,11 +4167,30 @@ Bool isFlatIRStmt ( const IRStmt* st ) return True; case Ist_Exit: return isIRAtom(st->Ist.Exit.guard); + case Ist_IfThenElse: + return isIRAtom(st->Ist.IfThenElse.details->cond) + && isFlatIRStmtVec(st->Ist.IfThenElse.details->then_leg) + && isFlatIRStmtVec(st->Ist.IfThenElse.details->else_leg); default: vpanic("isFlatIRStmt(st)"); } } +static +Bool isFlatIRStmtVec(const IRStmtVec* stmts) +{ + for (UInt i = 0; i < stmts->stmts_used; i++) { + if (!isFlatIRStmt(stmts->stmts[i])) + return False; + } + + return True; +} + +Bool isFlatIRSB(const IRSB* irsb) +{ + return isFlatIRStmtVec(irsb->stmts); +} /*---------------------------------------------------------------*/ /*--- Sanity checking ---*/ @@ -3852,6 +4203,10 @@ Bool isFlatIRStmt ( const IRStmt* st ) bit expression, depending on the guest's word size. Each temp is assigned only once, before its uses. + Each temp assigned and referenced is in scope. + + Phi functions refer to existing, already assigned temporaries from + [parent, then leg, else leg]. */ static inline Int countArgs ( IRExpr** args ) @@ -3870,7 +4225,7 @@ void sanityCheckFail ( const IRSB* bb, const IRStmt* stmt, const HChar* what ) ppIRSB(bb); if (stmt) { vex_printf("\nIN STATEMENT:\n\n"); - ppIRStmt(stmt); + ppIRStmt(stmt, bb->tyenv, 1); } vex_printf("\n\nERROR = %s\n\n", what ); vpanic("sanityCheckFail: exiting due to bad IR"); @@ -3910,29 +4265,62 @@ static Bool saneIRConst ( const IRConst* con ) } } -/* Traverse a Stmt/Expr, inspecting IRTemp uses. Report any out of - range ones. Report any which are read and for which the current - def_count is zero. */ +/* Traverse a Stmt/Expr, inspecting IRTemp uses. Report any out of range or out + of scope ones. Report any which are read and for which the current + def_count is zero. Report any which are assigned more than once or assigned + after being used. */ + +static Bool inRangeIRTemp(const IRTypeEnv* tyenv, IRTemp tmp) +{ + if (tmp >= 0 || tmp < tyenv->used) { + return True; + } + return False; +} + +static Bool inScopeIRTemp(const IRTypeEnv* tyenv, const IRStmtVec* stmts, + IRTemp tmp) +{ + IRStmtVecID id = tyenv->ids[tmp]; + vassert(id != IRStmtVecID_INVALID); + + while (!(isIRTempDefined(stmts->defset, tmp))) { + stmts = stmts->parent; + if (stmts == NULL) + return False; + } + + return True; +} static -void useBeforeDef_Temp ( const IRSB* bb, const IRStmt* stmt, IRTemp tmp, - Int* def_counts ) +void useBeforeDef_Temp(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, IRTemp tmp, UInt def_counts[]) { - if (tmp < 0 || tmp >= bb->tyenv->types_used) - sanityCheckFail(bb,stmt, "out of range Temp in IRExpr"); + const IRTypeEnv* tyenv = bb->tyenv; + + if (!inRangeIRTemp(tyenv, tmp)) + sanityCheckFail(bb, stmt, "out of range Temp in IRExpr"); + if (!inScopeIRTemp(tyenv, stmts, tmp)) + sanityCheckFail(bb, stmt, "out of scope Temp in IRExpr"); + if (def_counts[tmp] < 1) - sanityCheckFail(bb,stmt, "IRTemp use before def in IRExpr"); + sanityCheckFail(bb, stmt, "IRTemp use before def in IRExpr"); } static -void assignedOnce_Temp(const IRSB *bb, const IRStmt *stmt, IRTemp tmp, - Int *def_counts, UInt n_def_counts, - const HChar *err_msg_out_of_range, - const HChar *err_msg_assigned_more_than_once) +void assignedOnce_Temp(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, IRTemp tmp, UInt def_counts[], + const HChar* err_msg_out_of_range, + const HChar* err_msg_out_of_scope, + const HChar* err_msg_assigned_more_than_once) { - if (tmp < 0 || tmp >= n_def_counts) { + const IRTypeEnv* tyenv = bb->tyenv; + + if (!inRangeIRTemp(tyenv, tmp)) sanityCheckFail(bb, stmt, err_msg_out_of_range); - } + if (!inScopeIRTemp(tyenv, stmts, tmp)) + sanityCheckFail(bb, stmt, err_msg_out_of_scope); def_counts[tmp]++; if (def_counts[tmp] > 1) { @@ -3941,43 +4329,44 @@ void assignedOnce_Temp(const IRSB *bb, const IRStmt *stmt, IRTemp tmp, } static -void useBeforeDef_Expr ( const IRSB* bb, const IRStmt* stmt, - const IRExpr* expr, Int* def_counts ) +void useBeforeDef_Expr(const IRSB *bb, const IRStmtVec* stmts, + const IRStmt* stmt, const IRExpr* expr, + UInt def_counts[]) { Int i; switch (expr->tag) { case Iex_Get: break; case Iex_GetI: - useBeforeDef_Expr(bb,stmt,expr->Iex.GetI.ix,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.GetI.ix, def_counts); break; case Iex_RdTmp: - useBeforeDef_Temp(bb,stmt,expr->Iex.RdTmp.tmp,def_counts); + useBeforeDef_Temp(bb, stmts, stmt, expr->Iex.RdTmp.tmp, def_counts); break; case Iex_Qop: { const IRQop* qop = expr->Iex.Qop.details; - useBeforeDef_Expr(bb,stmt,qop->arg1,def_counts); - useBeforeDef_Expr(bb,stmt,qop->arg2,def_counts); - useBeforeDef_Expr(bb,stmt,qop->arg3,def_counts); - useBeforeDef_Expr(bb,stmt,qop->arg4,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, qop->arg1, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, qop->arg2, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, qop->arg3, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, qop->arg4, def_counts); break; } case Iex_Triop: { const IRTriop* triop = expr->Iex.Triop.details; - useBeforeDef_Expr(bb,stmt,triop->arg1,def_counts); - useBeforeDef_Expr(bb,stmt,triop->arg2,def_counts); - useBeforeDef_Expr(bb,stmt,triop->arg3,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, triop->arg1, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, triop->arg2, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, triop->arg3, def_counts); break; } case Iex_Binop: - useBeforeDef_Expr(bb,stmt,expr->Iex.Binop.arg1,def_counts); - useBeforeDef_Expr(bb,stmt,expr->Iex.Binop.arg2,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.Binop.arg1, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.Binop.arg2, def_counts); break; case Iex_Unop: - useBeforeDef_Expr(bb,stmt,expr->Iex.Unop.arg,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.Unop.arg, def_counts); break; case Iex_Load: - useBeforeDef_Expr(bb,stmt,expr->Iex.Load.addr,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.Load.addr, def_counts); break; case Iex_Const: break; @@ -3988,24 +4377,55 @@ void useBeforeDef_Expr ( const IRSB* bb, const IRStmt* stmt, /* These aren't allowed in CCall lists. Let's detect and throw them out here, though, rather than segfaulting a bit later on. */ - sanityCheckFail(bb,stmt, "IRExprP__* value in CCall arg list"); + sanityCheckFail(bb,stmt, + "IRExprP__* value in CCall arg list"); } else { - useBeforeDef_Expr(bb,stmt,arg,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, arg, def_counts); } } break; case Iex_ITE: - useBeforeDef_Expr(bb,stmt,expr->Iex.ITE.cond,def_counts); - useBeforeDef_Expr(bb,stmt,expr->Iex.ITE.iftrue,def_counts); - useBeforeDef_Expr(bb,stmt,expr->Iex.ITE.iffalse,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.ITE.cond, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.ITE.iftrue, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, expr->Iex.ITE.iffalse, def_counts); break; default: vpanic("useBeforeDef_Expr"); } } +static void useBeforeDef_IRPhi(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, const IRPhi* phi, + UInt def_counts[]) +{ + vassert(stmt->tag == Ist_IfThenElse); + + IRStmtVec* then_leg = stmt->Ist.IfThenElse.details->then_leg; + IRStmtVec* else_leg = stmt->Ist.IfThenElse.details->else_leg; + + useBeforeDef_Temp(bb, then_leg, stmt, phi->srcThen, def_counts); + useBeforeDef_Temp(bb, else_leg, stmt, phi->srcElse, def_counts); + + /* Check also that referenced IRStmtVec's actually exist and belong to + "parent", "then", and "else", respectively. */ + const IRTypeEnv* tyenv = bb->tyenv; + if (tyenv->ids[phi->dst] != stmts->id) { + sanityCheckFail(bb, stmt, "Istmt.IfThenElse.Phi.dst does not " + "reference parent IRStmtVec"); + } + if (tyenv->ids[phi->srcThen] != then_leg->id) { + sanityCheckFail(bb, stmt, "Istmt.IfThenElse.Phi.srcThen does not " + "reference \"then\" IRStmtVec leg"); + } + if (tyenv->ids[phi->srcElse] != else_leg->id) { + sanityCheckFail(bb, stmt, "Istmt.IfThenElse.Phi.srcElse does not " + "reference \"else\" IRStmtVec leg"); + } +} + static -void useBeforeDef_Stmt ( const IRSB* bb, const IRStmt* stmt, Int* def_counts ) +void useBeforeDef_Stmt(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, UInt def_counts[]) { Int i; const IRDirty* d; @@ -4017,50 +4437,51 @@ void useBeforeDef_Stmt ( const IRSB* bb, const IRStmt* stmt, Int* def_counts ) case Ist_IMark: break; case Ist_AbiHint: - useBeforeDef_Expr(bb,stmt,stmt->Ist.AbiHint.base,def_counts); - useBeforeDef_Expr(bb,stmt,stmt->Ist.AbiHint.nia,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.AbiHint.base, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.AbiHint.nia, def_counts); break; case Ist_Put: - useBeforeDef_Expr(bb,stmt,stmt->Ist.Put.data,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.Put.data, def_counts); break; case Ist_PutI: puti = stmt->Ist.PutI.details; - useBeforeDef_Expr(bb,stmt,puti->ix,def_counts); - useBeforeDef_Expr(bb,stmt,puti->data,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, puti->ix, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, puti->data, def_counts); break; case Ist_WrTmp: - useBeforeDef_Expr(bb,stmt,stmt->Ist.WrTmp.data,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.WrTmp.data, def_counts); break; case Ist_Store: - useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.addr,def_counts); - useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.data,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.Store.addr, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.Store.data, def_counts); break; case Ist_StoreG: sg = stmt->Ist.StoreG.details; - useBeforeDef_Expr(bb,stmt,sg->addr,def_counts); - useBeforeDef_Expr(bb,stmt,sg->data,def_counts); - useBeforeDef_Expr(bb,stmt,sg->guard,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, sg->addr, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, sg->data, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, sg->guard, def_counts); break; case Ist_LoadG: lg = stmt->Ist.LoadG.details; - useBeforeDef_Expr(bb,stmt,lg->addr,def_counts); - useBeforeDef_Expr(bb,stmt,lg->alt,def_counts); - useBeforeDef_Expr(bb,stmt,lg->guard,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, lg->addr, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, lg->alt, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, lg->guard, def_counts); break; case Ist_CAS: cas = stmt->Ist.CAS.details; - useBeforeDef_Expr(bb,stmt,cas->addr,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, cas->addr, def_counts); if (cas->expdHi) - useBeforeDef_Expr(bb,stmt,cas->expdHi,def_counts); - useBeforeDef_Expr(bb,stmt,cas->expdLo,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, cas->expdHi, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, cas->expdLo, def_counts); if (cas->dataHi) - useBeforeDef_Expr(bb,stmt,cas->dataHi,def_counts); - useBeforeDef_Expr(bb,stmt,cas->dataLo,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, cas->dataHi, def_counts); + useBeforeDef_Expr(bb, stmts, stmt, cas->dataLo, def_counts); break; case Ist_LLSC: - useBeforeDef_Expr(bb,stmt,stmt->Ist.LLSC.addr,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.LLSC.addr, def_counts); if (stmt->Ist.LLSC.storedata != NULL) - useBeforeDef_Expr(bb,stmt,stmt->Ist.LLSC.storedata,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.LLSC.storedata, + def_counts); break; case Ist_Dirty: d = stmt->Ist.Dirty.details; @@ -4070,17 +4491,23 @@ void useBeforeDef_Stmt ( const IRSB* bb, const IRStmt* stmt, Int* def_counts ) /* This is ensured by isFlatIRStmt */ ; } else { - useBeforeDef_Expr(bb,stmt,arg,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, arg, def_counts); } } if (d->mFx != Ifx_None) - useBeforeDef_Expr(bb,stmt,d->mAddr,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, d->mAddr, def_counts); break; case Ist_NoOp: case Ist_MBE: break; case Ist_Exit: - useBeforeDef_Expr(bb,stmt,stmt->Ist.Exit.guard,def_counts); + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.Exit.guard, def_counts); + break; + case Ist_IfThenElse: + useBeforeDef_Expr(bb, stmts, stmt, stmt->Ist.IfThenElse.details->cond, + def_counts); + /* Traversing into legs and phi nodes driven from + sanityCheckIRStmtVec(). */ break; default: vpanic("useBeforeDef_Stmt"); @@ -4088,48 +4515,59 @@ void useBeforeDef_Stmt ( const IRSB* bb, const IRStmt* stmt, Int* def_counts ) } static -void assignedOnce_Stmt(const IRSB *bb, const IRStmt *stmt, - Int *def_counts, UInt n_def_counts) +void assignedOnce_Stmt(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, UInt def_counts[]) { switch (stmt->tag) { case Ist_WrTmp: assignedOnce_Temp( - bb, stmt, stmt->Ist.WrTmp.tmp, def_counts, n_def_counts, + bb, stmts, stmt, stmt->Ist.WrTmp.tmp, def_counts, "IRStmt.Tmp: destination tmp is out of range", + "IRStmt.Tmp: destination tmp is out of scope", "IRStmt.Tmp: destination tmp is assigned more than once"); break; case Ist_LoadG: assignedOnce_Temp( - bb, stmt, stmt->Ist.LoadG.details->dst, def_counts, n_def_counts, + bb, stmts, stmt, stmt->Ist.LoadG.details->dst, def_counts, "IRStmt.LoadG: destination tmp is out of range", + "IRStmt.LoadG: destination tmp is out of scope", "IRStmt.LoadG: destination tmp is assigned more than once"); break; case Ist_Dirty: if (stmt->Ist.Dirty.details->tmp != IRTemp_INVALID) { assignedOnce_Temp( - bb, stmt, stmt->Ist.Dirty.details->tmp, def_counts, n_def_counts, + bb, stmts, stmt, stmt->Ist.Dirty.details->tmp, def_counts, "IRStmt.Dirty: destination tmp is out of range", + "IRStmt.Dirty: destination tmp is out of scope", "IRStmt.Dirty: destination tmp is assigned more than once"); } break; case Ist_CAS: if (stmt->Ist.CAS.details->oldHi != IRTemp_INVALID) { assignedOnce_Temp( - bb, stmt, stmt->Ist.CAS.details->oldHi, def_counts, n_def_counts, + bb, stmts, stmt, stmt->Ist.CAS.details->oldHi, def_counts, "IRStmt.CAS: destination tmpHi is out of range", + "IRStmt.CAS: destination tmpHi is out of scope", "IRStmt.CAS: destination tmpHi is assigned more than once"); } assignedOnce_Temp( - bb, stmt, stmt->Ist.CAS.details->oldLo, def_counts, n_def_counts, + bb, stmts, stmt, stmt->Ist.CAS.details->oldLo, def_counts, "IRStmt.CAS: destination tmpLo is out of range", + "IRStmt.CAS: destination tmpLo is out of scope", "IRStmt.CAS: destination tmpLo is assigned more than once"); break; case Ist_LLSC: assignedOnce_Temp( - bb, stmt, stmt->Ist.LLSC.result, def_counts, n_def_counts, + bb, stmts, stmt, stmt->Ist.LLSC.result, def_counts, "IRStmt.LLSC: destination tmp is out of range", + "IRStmt.LLSC: destination tmp is out of scope", "IRStmt.LLSC: destination tmp is assigned more than once"); break; + case Ist_IfThenElse: { + /* Traversing into legs and phi nodes driven from + sanityCheckIRStmtVec(). */ + break; + } // Ignore all other cases case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: case Ist_Put: case Ist_PutI: case Ist_Store: case Ist_StoreG: case Ist_MBE: case Ist_Exit: @@ -4139,9 +4577,19 @@ void assignedOnce_Stmt(const IRSB *bb, const IRStmt *stmt, } } +static void assignedOnce_IRPhi(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, const IRPhi* phi, + UInt def_counts[]) +{ + assignedOnce_Temp(bb, stmts, stmt, phi->dst, def_counts, + "IRStmt.IfThenElse.Phi: destination tmp is out of range", + "IRStmt.IfThenElse.Phi: destination tmp is out of scope", + "IRStmt.IfThenElse: destination tmp is assigned more than once"); +} + static -void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, - IRType gWordTy ) +void tcExpr(const IRSB* bb, const IRStmtVec* stmts, const IRStmt* stmt, + const IRExpr* expr, IRType gWordTy) { Int i; IRType t_dst, t_arg1, t_arg2, t_arg3, t_arg4; @@ -4151,8 +4599,8 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, case Iex_RdTmp: break; case Iex_GetI: - tcExpr(bb,stmt, expr->Iex.GetI.ix, gWordTy ); - if (typeOfIRExpr(tyenv,expr->Iex.GetI.ix) != Ity_I32) + tcExpr(bb, stmts, stmt, expr->Iex.GetI.ix, gWordTy); + if (typeOfIRExpr(tyenv, expr->Iex.GetI.ix) != Ity_I32) sanityCheckFail(bb,stmt,"IRExpr.GetI.ix: not :: Ity_I32"); if (!saneIRRegArray(expr->Iex.GetI.descr)) sanityCheckFail(bb,stmt,"IRExpr.GetI.descr: invalid descr"); @@ -4160,10 +4608,10 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, case Iex_Qop: { IRType ttarg1, ttarg2, ttarg3, ttarg4; const IRQop* qop = expr->Iex.Qop.details; - tcExpr(bb,stmt, qop->arg1, gWordTy ); - tcExpr(bb,stmt, qop->arg2, gWordTy ); - tcExpr(bb,stmt, qop->arg3, gWordTy ); - tcExpr(bb,stmt, qop->arg4, gWordTy ); + tcExpr(bb, stmts, stmt, qop->arg1, gWordTy); + tcExpr(bb, stmts, stmt, qop->arg2, gWordTy); + tcExpr(bb, stmts, stmt, qop->arg3, gWordTy); + tcExpr(bb, stmts, stmt, qop->arg4, gWordTy); typeOfPrimop(qop->op, &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID @@ -4212,9 +4660,9 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, case Iex_Triop: { IRType ttarg1, ttarg2, ttarg3; const IRTriop *triop = expr->Iex.Triop.details; - tcExpr(bb,stmt, triop->arg1, gWordTy ); - tcExpr(bb,stmt, triop->arg2, gWordTy ); - tcExpr(bb,stmt, triop->arg3, gWordTy ); + tcExpr(bb, stmts, stmt, triop->arg1, gWordTy); + tcExpr(bb, stmts, stmt, triop->arg2, gWordTy); + tcExpr(bb, stmts, stmt, triop->arg3, gWordTy); typeOfPrimop(triop->op, &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID @@ -4256,8 +4704,8 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, } case Iex_Binop: { IRType ttarg1, ttarg2; - tcExpr(bb,stmt, expr->Iex.Binop.arg1, gWordTy ); - tcExpr(bb,stmt, expr->Iex.Binop.arg2, gWordTy ); + tcExpr(bb, stmts, stmt, expr->Iex.Binop.arg1, gWordTy); + tcExpr(bb, stmts, stmt, expr->Iex.Binop.arg2, gWordTy); typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID @@ -4293,7 +4741,7 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, break; } case Iex_Unop: - tcExpr(bb,stmt, expr->Iex.Unop.arg, gWordTy ); + tcExpr(bb, stmts, stmt, expr->Iex.Unop.arg, gWordTy); typeOfPrimop(expr->Iex.Unop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4); if (t_arg1 == Ity_INVALID || t_arg2 != Ity_INVALID @@ -4303,7 +4751,7 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, sanityCheckFail(bb,stmt,"Iex.Unop: arg ty doesn't match op ty"); break; case Iex_Load: - tcExpr(bb,stmt, expr->Iex.Load.addr, gWordTy); + tcExpr(bb, stmts, stmt, expr->Iex.Load.addr, gWordTy); if (typeOfIRExpr(tyenv, expr->Iex.Load.addr) != gWordTy) sanityCheckFail(bb,stmt,"Iex.Load.addr: not :: guest word type"); if (expr->Iex.Load.end != Iend_LE && expr->Iex.Load.end != Iend_BE) @@ -4320,10 +4768,11 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, IRExpr* arg = expr->Iex.CCall.args[i]; if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg))) sanityCheckFail(bb,stmt,"Iex.CCall.args: is VECRET/GSPTR"); - tcExpr(bb,stmt, arg, gWordTy); + tcExpr(bb, stmts, stmt, arg, gWordTy); } if (expr->Iex.CCall.retty == Ity_I1) - sanityCheckFail(bb,stmt,"Iex.CCall.retty: cannot return :: Ity_I1"); + sanityCheckFail(bb,stmt, + "Iex.CCall.retty: cannot return :: Ity_I1"); for (i = 0; expr->Iex.CCall.args[i]; i++) if (typeOfIRExpr(tyenv, expr->Iex.CCall.args[i]) == Ity_I1) sanityCheckFail(bb,stmt,"Iex.CCall.arg: arg :: Ity_I1"); @@ -4333,9 +4782,9 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, sanityCheckFail(bb,stmt,"Iex.Const.con: invalid const"); break; case Iex_ITE: - tcExpr(bb,stmt, expr->Iex.ITE.cond, gWordTy); - tcExpr(bb,stmt, expr->Iex.ITE.iftrue, gWordTy); - tcExpr(bb,stmt, expr->Iex.ITE.iffalse, gWordTy); + tcExpr(bb, stmts, stmt, expr->Iex.ITE.cond, gWordTy); + tcExpr(bb, stmts, stmt, expr->Iex.ITE.iftrue, gWordTy); + tcExpr(bb, stmts, stmt, expr->Iex.ITE.iffalse, gWordTy); if (typeOfIRExpr(tyenv, expr->Iex.ITE.cond) != Ity_I1) sanityCheckFail(bb,stmt,"Iex.ITE.cond: cond :: Ity_I1"); if (typeOfIRExpr(tyenv, expr->Iex.ITE.iftrue) @@ -4347,11 +4796,27 @@ void tcExpr ( const IRSB* bb, const IRStmt* stmt, const IRExpr* expr, } } +static +void tcPhi(const IRSB* bb, const IRStmtVec* stmts, const IRStmt* stmt, + const IRPhi* phi) +{ + vassert(stmt->tag == Ist_IfThenElse); + const IRTypeEnv* tyenv = bb->tyenv; + + if (typeOfIRTemp(tyenv, phi->srcThen) != typeOfIRTemp(tyenv, phi->srcElse)) { + sanityCheckFail(bb, stmt, "IRStmt.IfThenElse.Phi: 'then' and 'else' " + "tmp do not match"); + } + if (typeOfIRTemp(tyenv, phi->dst) != typeOfIRTemp(tyenv, phi->srcThen)) { + sanityCheckFail(bb, stmt, "IRStmt.IfThenElse.Phi: 'dst' and 'then' " + "tmp do not match"); + } +} static -void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) +void tcStmt(const IRSB* bb, const IRStmtVec* stmts, const IRStmt* stmt, + Bool require_flat, IRType gWordTy) { - Int i; IRType tyExpd, tyData; const IRTypeEnv* tyenv = bb->tyenv; switch (stmt->tag) { @@ -4372,35 +4837,35 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) "not :: guest word type"); break; case Ist_Put: - tcExpr( bb, stmt, stmt->Ist.Put.data, gWordTy ); - if (typeOfIRExpr(tyenv,stmt->Ist.Put.data) == Ity_I1) + tcExpr(bb, stmts, stmt, stmt->Ist.Put.data, gWordTy); + if (typeOfIRExpr(tyenv, stmt->Ist.Put.data) == Ity_I1) sanityCheckFail(bb,stmt,"IRStmt.Put.data: cannot Put :: Ity_I1"); break; case Ist_PutI:{ const IRPutI* puti = stmt->Ist.PutI.details; - tcExpr( bb, stmt, puti->data, gWordTy ); - tcExpr( bb, stmt, puti->ix, gWordTy ); - if (typeOfIRExpr(tyenv,puti->data) == Ity_I1) - sanityCheckFail(bb,stmt,"IRStmt.PutI.data: cannot PutI :: Ity_I1"); - if (typeOfIRExpr(tyenv,puti->data) - != puti->descr->elemTy) + tcExpr(bb, stmts, stmt, puti->data, gWordTy); + tcExpr(bb, stmts, stmt, puti->ix, gWordTy); + if (typeOfIRExpr(tyenv, puti->data) == Ity_I1) + sanityCheckFail(bb,stmt, + "IRStmt.PutI.data: cannot PutI :: Ity_I1"); + if (typeOfIRExpr(tyenv, puti->data) != puti->descr->elemTy) sanityCheckFail(bb,stmt,"IRStmt.PutI.data: data ty != elem ty"); - if (typeOfIRExpr(tyenv,puti->ix) != Ity_I32) + if (typeOfIRExpr(tyenv, puti->ix) != Ity_I32) sanityCheckFail(bb,stmt,"IRStmt.PutI.ix: not :: Ity_I32"); if (!saneIRRegArray(puti->descr)) sanityCheckFail(bb,stmt,"IRStmt.PutI.descr: invalid descr"); break; } case Ist_WrTmp: - tcExpr( bb, stmt, stmt->Ist.WrTmp.data, gWordTy ); + tcExpr(bb, stmts, stmt, stmt->Ist.WrTmp.data, gWordTy); if (typeOfIRTemp(tyenv, stmt->Ist.WrTmp.tmp) != typeOfIRExpr(tyenv, stmt->Ist.WrTmp.data)) sanityCheckFail(bb,stmt, "IRStmt.Put.Tmp: tmp and expr do not match"); break; case Ist_Store: - tcExpr( bb, stmt, stmt->Ist.Store.addr, gWordTy ); - tcExpr( bb, stmt, stmt->Ist.Store.data, gWordTy ); + tcExpr(bb, stmts, stmt, stmt->Ist.Store.addr, gWordTy); + tcExpr(bb, stmts, stmt, stmt->Ist.Store.data, gWordTy); if (typeOfIRExpr(tyenv, stmt->Ist.Store.addr) != gWordTy) sanityCheckFail(bb,stmt, "IRStmt.Store.addr: not :: guest word type"); @@ -4412,9 +4877,9 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) break; case Ist_StoreG: { const IRStoreG* sg = stmt->Ist.StoreG.details; - tcExpr( bb, stmt, sg->addr, gWordTy ); - tcExpr( bb, stmt, sg->data, gWordTy ); - tcExpr( bb, stmt, sg->guard, gWordTy ); + tcExpr(bb, stmts, stmt, sg->addr, gWordTy); + tcExpr(bb, stmts, stmt, sg->data, gWordTy); + tcExpr(bb, stmts, stmt, sg->guard, gWordTy); if (typeOfIRExpr(tyenv, sg->addr) != gWordTy) sanityCheckFail(bb,stmt,"IRStmtG...addr: not :: guest word type"); if (typeOfIRExpr(tyenv, sg->data) == Ity_I1) @@ -4427,9 +4892,9 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) } case Ist_LoadG: { const IRLoadG* lg = stmt->Ist.LoadG.details; - tcExpr( bb, stmt, lg->addr, gWordTy ); - tcExpr( bb, stmt, lg->alt, gWordTy ); - tcExpr( bb, stmt, lg->guard, gWordTy ); + tcExpr(bb, stmts, stmt, lg->addr, gWordTy); + tcExpr(bb, stmts, stmt, lg->alt, gWordTy); + tcExpr(bb, stmts, stmt, lg->guard, gWordTy); if (typeOfIRExpr(tyenv, lg->guard) != Ity_I1) sanityCheckFail(bb,stmt,"IRStmt.LoadG.guard: not :: Ity_I1"); if (typeOfIRExpr(tyenv, lg->addr) != gWordTy) @@ -4446,12 +4911,12 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) case Ist_CAS: { const IRCAS* cas = stmt->Ist.CAS.details; /* make sure it's definitely either a CAS or a DCAS */ - if (cas->oldHi == IRTemp_INVALID + if (cas->oldHi == IRTemp_INVALID && cas->expdHi == NULL && cas->dataHi == NULL) { /* fine; it's a single cas */ } else - if (cas->oldHi != IRTemp_INVALID + if (cas->oldHi != IRTemp_INVALID && cas->expdHi != NULL && cas->dataHi != NULL) { /* fine; it's a double cas */ } @@ -4460,7 +4925,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) goto bad_cas; } /* check the address type */ - tcExpr( bb, stmt, cas->addr, gWordTy ); + tcExpr(bb, stmts, stmt, cas->addr, gWordTy); if (typeOfIRExpr(tyenv, cas->addr) != gWordTy) goto bad_cas; /* check types on the {old,expd,data}Lo components agree */ tyExpd = typeOfIRExpr(tyenv, cas->expdLo); @@ -4533,7 +4998,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) } if (d->nFxState < 0 || d->nFxState > VEX_N_FXSTATE) goto bad_dirty; - for (i = 0; i < d->nFxState; i++) { + for (UInt i = 0; i < d->nFxState; i++) { if (d->fxState[i].fx == Ifx_None) goto bad_dirty; if (d->fxState[i].size <= 0) goto bad_dirty; if (d->fxState[i].nRepeats == 0) { @@ -4548,7 +5013,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) } /* check guard */ if (d->guard == NULL) goto bad_dirty; - tcExpr( bb, stmt, d->guard, gWordTy ); + tcExpr(bb, stmts, stmt, d->guard, gWordTy); if (typeOfIRExpr(tyenv, d->guard) != Ity_I1) sanityCheckFail(bb,stmt,"IRStmt.Dirty.guard not :: Ity_I1"); /* check types, minimally */ @@ -4559,7 +5024,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) sanityCheckFail(bb,stmt,"IRStmt.Dirty.dst :: Ity_I1"); } UInt nVECRETs = 0, nGSPTRs = 0; - for (i = 0; d->args[i] != NULL; i++) { + for (UInt i = 0; d->args[i] != NULL; i++) { if (i >= 32) sanityCheckFail(bb,stmt,"IRStmt.Dirty: > 32 args"); const IRExpr* arg = d->args[i]; @@ -4618,8 +5083,8 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) } break; case Ist_Exit: - tcExpr( bb, stmt, stmt->Ist.Exit.guard, gWordTy ); - if (typeOfIRExpr(tyenv,stmt->Ist.Exit.guard) != Ity_I1) + tcExpr(bb, stmts, stmt, stmt->Ist.Exit.guard, gWordTy); + if (typeOfIRExpr(tyenv, stmt->Ist.Exit.guard) != Ity_I1) sanityCheckFail(bb,stmt,"IRStmt.Exit.guard: not :: Ity_I1"); if (!saneIRConst(stmt->Ist.Exit.dst)) sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: bad dst"); @@ -4629,77 +5094,156 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) if (stmt->Ist.Exit.offsIP < 16) sanityCheckFail(bb,stmt,"IRStmt.Exit.offsIP: too low"); break; + case Ist_IfThenElse: + tcExpr(bb, stmts, stmt, stmt->Ist.IfThenElse.details->cond, gWordTy); + if (typeOfIRExpr(tyenv, stmt->Ist.IfThenElse.details->cond) != Ity_I1) + sanityCheckFail(bb,stmt,"IRStmt.IfThenElse.cond: not :: Ity_I1"); + /* Traversing into legs and phi nodes driven from + sanityCheckIRStmtVec(). */ + break; default: vpanic("tcStmt"); } } -void sanityCheckIRSB ( const IRSB* bb, const HChar* caller, - Bool require_flat, IRType guest_word_size ) +static void sanityCheckIRPhiNodes(const IRSB* bb, const IRStmtVec* stmts, + const IRStmt* stmt, const IRPhiVec* phi_nodes, UInt def_counts[]) { - Int i; - Int n_temps = bb->tyenv->types_used; - Int* def_counts = LibVEX_Alloc_inline(n_temps * sizeof(Int)); + for (UInt i = 0; i < phi_nodes->phis_used; i++) { + const IRPhi* phi = phi_nodes->phis[i]; + useBeforeDef_IRPhi(bb, stmts, stmt, phi, def_counts); + assignedOnce_IRPhi(bb, stmts, stmt, phi, def_counts); + tcPhi(bb, stmts, stmt, phi); + } +} - if (0) - vex_printf("sanityCheck: %s\n", caller); +static +void sanityCheckIRStmtVec(const IRSB* bb, const IRStmtVec* stmts, + Bool require_flat, UInt def_counts[], + UInt id_counts[], UInt n_ids, IRType gWordTy) +{ + IRStmtVecID id = stmts->id; + if (id == IRStmtVecID_INVALID) { + vpanic("sanityCheckIRStmtVec: invalid IRStmtVec ID"); + } - vassert(guest_word_size == Ity_I32 - || guest_word_size == Ity_I64); + if (id >= n_ids) { + vex_printf("IRStmtVec's ID (%u) is larger than number of IRStmtVec's " + "(%u)\n", id, n_ids); + sanityCheckFail(bb, NULL, "IRStmtVec's ID larger than number of " + "IRStmtVec's"); + } - if (bb->stmts_used < 0 || bb->stmts_size < 8 - || bb->stmts_used > bb->stmts_size) - /* this BB is so strange we can't even print it */ - vpanic("sanityCheckIRSB: stmts array limits wierd"); + id_counts[id] += 1; + if (id_counts[id] > 1) { + sanityCheckFail(bb, NULL, "the same IRStmtVec ID used more than once"); + } - /* Ensure each temp has a plausible type. */ - for (i = 0; i < n_temps; i++) { - IRType ty = typeOfIRTemp(bb->tyenv,(IRTemp)i); - if (!isPlausibleIRType(ty)) { - vex_printf("Temp t%d declared with implausible type 0x%x\n", - i, (UInt)ty); - sanityCheckFail(bb,NULL,"Temp declared with implausible type"); - } + if (stmts->stmts_used < 0 || stmts->stmts_size < 8 + || stmts->stmts_used > stmts->stmts_size) { + /* this IRStmtVec is so strange we can't even print it */ + vpanic("sanityCheckIRStmtVec: stmts array limits wierd"); } - const IRStmt* stmt; + for (UInt i = 0; i < stmts->stmts_used; i++) { + const IRStmt *stmt = stmts->stmts[i]; + if (stmt == NULL) + sanityCheckFail(bb, stmt, "IRStmt: is NULL"); - /* Check for flatness, if required. */ - if (require_flat) { - for (i = 0; i < bb->stmts_used; i++) { - stmt = bb->stmts[i]; - if (!stmt) - sanityCheckFail(bb, stmt, "IRStmt: is NULL"); - if (!isFlatIRStmt(stmt)) + /* Check for flatness, if required. */ + if (require_flat) { + if (!isFlatIRStmt(stmt)) { sanityCheckFail(bb, stmt, "IRStmt: is not flat"); + } + } + + /* Count the defs of each temp. Only one def is allowed. + Also, check that each used temp has already been defd. */ + useBeforeDef_Stmt(bb, stmts, stmt, def_counts); + assignedOnce_Stmt(bb, stmts, stmt, def_counts); + tcStmt(bb, stmts, stmt, require_flat, gWordTy); + + if (stmt->tag == Ist_IfThenElse) { + const IRIfThenElse* ite = stmt->Ist.IfThenElse.details; + const IRStmtVec* then_leg = ite->then_leg; + const IRStmtVec* else_leg = ite->else_leg; + + if (then_leg->parent == NULL) { + sanityCheckFail(bb, stmt, "IfThenElse.then.parent is NULL"); + } + if (else_leg->parent == NULL) { + sanityCheckFail(bb, stmt, "IfThenElse.else.parent is NULL"); + } + if (then_leg->parent != stmts) { + sanityCheckFail(bb, stmt, "IfThenElse.then.parent does not point " + "to its parent"); + } + if (else_leg->parent != stmts) { + sanityCheckFail(bb, stmt, "IfThenElse.else.parent does not point " + "to its parent"); + } + + sanityCheckIRStmtVec(bb, then_leg, require_flat, def_counts, + id_counts, n_ids, gWordTy); + sanityCheckIRStmtVec(bb, else_leg, require_flat, def_counts, + id_counts, n_ids, gWordTy); + sanityCheckIRPhiNodes(bb, stmts, stmt, ite->phi_nodes, def_counts); } - if (!isIRAtom(bb->next)) - sanityCheckFail(bb, NULL, "bb->next is not an atom"); } +} - /* Count the defs of each temp. Only one def is allowed. - Also, check that each used temp has already been defd. */ +/* Sanity checks basic block of IR. + Also checks for IRTyEnvID uniqueness. */ +void sanityCheckIRSB(const IRSB* bb, const HChar* caller, Bool require_flat, + IRType gWordTy) +{ + UInt n_ids = bb->id_seq; + UInt *id_counts = LibVEX_Alloc_inline(n_ids * sizeof(UInt)); + for (UInt i = 0; i < n_ids; i++) { + id_counts[i] = 0; + } - for (i = 0; i < n_temps; i++) + const IRTypeEnv* tyenv = bb->tyenv; + UInt n_temps = tyenv->used; + UInt *def_counts = LibVEX_Alloc_inline(n_temps * sizeof(UInt)); + for (UInt i = 0; i < n_temps; i++) { def_counts[i] = 0; + } + + if (0) + vex_printf("sanityCheck: %s\n", caller); - for (i = 0; i < bb->stmts_used; i++) { - stmt = bb->stmts[i]; - /* Check any temps used by this statement. */ - useBeforeDef_Stmt(bb,stmt,def_counts); + vassert(gWordTy == Ity_I32 || gWordTy == Ity_I64); - /* Now make note of any temps defd by this statement. */ - assignedOnce_Stmt(bb, stmt, def_counts, n_temps); + /* Ensure each temp has a plausible type. */ + for (UInt i = 0; i < n_temps; i++) { + IRTemp temp = (IRTemp) i; + IRType ty = typeOfIRTemp(tyenv, temp); + if (!isPlausibleIRType(ty)) { + vex_printf("Temp "); + ppIRTemp(temp); + vex_printf(" declared with implausible type 0x%x\n", (UInt) ty); + sanityCheckFail(bb, NULL, "Temp declared with implausible type"); + } } - /* Typecheck everything. */ - for (i = 0; i < bb->stmts_used; i++) - tcStmt(bb, bb->stmts[i], guest_word_size); - if (typeOfIRExpr(bb->tyenv,bb->next) != guest_word_size) + sanityCheckIRStmtVec(bb, bb->stmts, require_flat, def_counts, id_counts, + n_ids, gWordTy); + + if (require_flat) { + if (!isIRAtom(bb->next)) { + sanityCheckFail(bb, NULL, "bb->next is not an atom"); + } + } + + /* Typecheck also next destination. */ + if (typeOfIRExpr(bb->tyenv, bb->next) != gWordTy) { sanityCheckFail(bb, NULL, "bb->next field has wrong type"); + } /* because it would intersect with host_EvC_* */ - if (bb->offsIP < 16) + if (bb->offsIP < 16) { sanityCheckFail(bb, NULL, "bb->offsIP: too low"); + } } /*---------------------------------------------------------------*/ |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:29
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=d630ee98787f95c717f7c9ae646dcf3f019ec088 commit d630ee98787f95c717f7c9ae646dcf3f019ec088 Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 06:15:58 2017 +0200 Introduce If-Then-Else concept and Phi nodes into VEX IR. Diff: --- VEX/pub/libvex_ir.h | 799 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 516 insertions(+), 283 deletions(-) diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 57fa9b6..af56186 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -56,13 +56,15 @@ Each IRSB contains three things: - a type environment, which indicates the type of each temporary value present in the IRSB - - a list of statements, which represent code - - a jump that exits from the end the IRSB - Because the blocks are multiple-exit, there can be additional - conditional exit statements that cause control to leave the IRSB - before the final exit. Also because of this, IRSBs can cover - multiple non-consecutive sequences of code (up to 3). These are - recorded in the type VexGuestExtents (see libvex.h). + - a vector of statements, which represent code + - a jump that exits from the end of the IRSB + Flow control can leave the IRSB before the final exit only in a leg of an + "if-then-else" statement. A leg of an "if-then-else" statement is just + another vector of statements. + "If-then-else" statements can be nested, however this is currently not + supported. + IRSBs can cover multiple non-consecutive sequences of code (up to 3). + These are recorded in the type VexGuestExtents (see libvex.h). Statements and expressions ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -397,7 +399,7 @@ typedef UInt IRTemp; /* Pretty-print an IRTemp. */ extern void ppIRTemp ( IRTemp ); -#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) +#define IRTemp_INVALID ((IRTemp) 0xFFFFFFFF) /* --------------- Primops (arity 1,2,3 and 4) --------------- */ @@ -2693,9 +2695,188 @@ extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt, IRTemp dst, IRExpr* addr, IRExpr* alt, IRExpr* guard ); +/* ------------------ Phi Nodes ------------------ */ + +/* Assigns result of phi(src1, src2) to a temporary. + A "phi" function is special "phony" function that does not have its + corresponding machine operation. Let's consider this example: + if (cond) { + t1 = 0x3:I64 + } else { + t1 = 0x4:I64 + } + However this is not possible under SSA rules (a temporary cannot be assigned + more than once). Now the "phi" function comes handy: + if (cond) { + t2 = 0x3:I64 + } else { + t3 = 0x4:I64 + } + t1 = phi(t2,t3) +*/ +typedef + struct { + IRTemp dst; + IRTemp srcThen; + IRTemp srcElse; + } + IRPhi; + +typedef + struct { + IRPhi** phis; + UInt phis_size; + UInt phis_used; + } + IRPhiVec; + +extern void ppIRPhi(const IRPhi*); +extern IRPhi* mkIRPhi(IRTemp dst, IRTemp srcThen, IRTemp srcElse); +extern IRPhi* deepCopyIRPhi(const IRPhi*); +extern void ppIRPhiVec(const IRPhiVec*, UInt depth); +extern IRPhiVec* emptyIRPhiVec(void); +extern IRPhiVec* deepCopyIRPhiVec(const IRPhiVec*); + +extern void addIRPhiToIRPhiVec(IRPhiVec* , IRPhi*); + +/* ----------------- IRTemp Defined Set ----------------- */ + +/* An IRTemp is defined in an IRStmtVec. By keeping track of where every IRTemp + is defined, it is possible to reason about IRTemp's scope. + + Let's have this IRStmtVec hierarchy: + IRSB + |- IRStmtVec #0 + |- IRStmtVec #1 + |- IRStmtVec #2 + |- IRStmtVec #3 + So an IRTemp defined in IRStmtVec #2 is valid (in scope) only + in IRStmtVec's #2 and #3; and out of scope in IRStmtVec's #0 and #1. + + Every IRStmtVec has its IRTempDefSet structure. This is a bit set indexed + by IRTemp; value 1 means that a particular IRTemp is defined there. + + 'bits' array does not grow each time an IRTemp is added to IRSB; it grows + lazily only when an IRTemp is marked as defined in a particular IRStmtVec. + */ +typedef + struct { + UChar* set; // a bit set, use isIRTempDefined() for access + UInt slots_used; + UInt slots_size; + } + IRTempDefSet; + +static inline Bool isIRTempDefined(const IRTempDefSet* defset, IRTemp tmp) +{ + if ((tmp / sizeof(UChar)) < defset->slots_size) { + UInt mask = (1 << (tmp % sizeof(UChar))); + return toBool(defset->set[tmp / sizeof(UChar)] & mask); + } + return False; +} + +extern void setIRTempDefined(IRTempDefSet* defset, IRTemp tmp); +extern void clearIRTempDefSet(IRTempDefSet* defset); +extern void ppIRTempDefSet(const IRTempDefSet* defset, UInt depth); +extern IRTempDefSet* emptyIRTempDefSet(void); +extern IRTempDefSet* deepCopyIRTempDefSet(const IRTempDefSet* defset); + + +/* --------------- If-Then-Else control flow diamond --------------- */ + +/* If-Then-Else control flow diamond. It contains: + - Guard controling whether "then" or "else" leg is taken + - A hint which leg is more likely to be taken (hot path vs cold path) + - "then" and "else" legs with vectors of statements + At the moment, nested "if-then-else" statements are not supported. + - Phi nodes, which are used to merge temporaries from "then" and "else" legs + + A leg can either end with an unconditional exit or join the main flow. + At the moment, unconditional exits are not supported. +*/ + +typedef + enum { + IfThenElse_ThenLikely=0x1E00, + IfThenElse_ElseLikely + } + IRIfThenElse_Hint; + +typedef + struct _IRStmtVec + IRStmtVec; + +typedef + struct _IRTypeEnv + IRTypeEnv; + +typedef + struct { + IRExpr* cond; + IRIfThenElse_Hint hint; + IRStmtVec* then_leg; + IRStmtVec* else_leg; + IRPhiVec* phi_nodes; + } + IRIfThenElse; + +extern void ppIRIfThenElse_Hint(IRIfThenElse_Hint hint); +/* Pretty print only If-Then-Else preamble: condition and hint. Not the legs. */ +extern void ppIRIfThenElseCondHint(const IRIfThenElse* ite); +extern void ppIRIfThenElse(const IRIfThenElse* ite, const IRTypeEnv* tyenv, + UInt depth); +extern IRIfThenElse* mkIRIfThenElse(IRExpr* cond, IRIfThenElse_Hint hint, + IRStmtVec* then_leg, IRStmtVec* else_leg, + IRPhiVec* phi_nodes); + /* ------------------ Statements ------------------ */ +/* IRStmt and IRStmtVec are mutually recursive. */ +typedef + struct _IRStmt + IRStmt; + +/* Uniquely identifies IRStmtVec in an IRSB, no matter how deeply nested. */ +typedef UShort IRStmtVecID; + +#define IRStmtVecID_INVALID ((IRStmtVecID) 0xFFFF) + +/* Vector of statements which contains: + - Statements themselves + - A unique IRStmtVecID + - Parent, which points to the parent IRStmtVec. Because "if-then-else" + statements cannot be currently nested, the parent is either NULL or points + to IRStmtVec #0. + - A set which keeps track of which IRTemp's are defined in this IRStmtVec. +*/ +struct _IRStmtVec { + IRStmt** stmts; + UInt stmts_size; + UInt stmts_used; + IRStmtVecID id; + IRStmtVec* parent; + IRTempDefSet* defset; +}; + +/* Pretty-prints a vector of statements. If 'tyenv' is not NULL, pretty-prints + IRStmtVec's defset using nicer ppIRTypeEnvDefd(). */ +extern void ppIRStmtVec(const IRStmtVec* stmts, const IRTypeEnv* tyenv, + UInt depth); + +/* Allocates an empty IRStmtVec with an invalid IRStmtVecID. + Such an IRStmtVec needs to have a valid IRStmtVecID - get it from + nextIRStmtVecID(). Only after this is done, then such an IRStmtVec is ready + for newIRTemp() to give out new temporaries. + Nested IRStmtVec also needs to have correctly set its parent. + + Function addEmptyIfThenElse() can be used conveniently instead. */ +extern IRStmtVec* emptyIRStmtVec(void); + +extern IRStmtVec* deepCopyIRStmtVec(const IRStmtVec* src, IRStmtVec* parent); + + /* The different kinds of statements. Their meaning is explained below in the comments for IRStmt. @@ -2708,7 +2889,7 @@ extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt, typedef enum { - Ist_NoOp=0x1E00, + Ist_NoOp=0x1F00, Ist_IMark, /* META */ Ist_AbiHint, /* META */ Ist_Put, @@ -2721,8 +2902,9 @@ typedef Ist_LLSC, Ist_Dirty, Ist_MBE, - Ist_Exit - } + Ist_Exit, + Ist_IfThenElse + } IRStmtTag; /* A statement. Stored as a tagged union. 'tag' indicates what kind @@ -2734,237 +2916,264 @@ typedef For each kind of statement, we show what it looks like when pretty-printed with ppIRStmt(). */ -typedef - struct _IRStmt { - IRStmtTag tag; - union { - /* A no-op (usually resulting from IR optimisation). Can be - omitted without any effect. - - ppIRStmt output: IR-NoOp - */ - struct { - } NoOp; - - /* META: instruction mark. Marks the start of the statements - that represent a single machine instruction (the end of - those statements is marked by the next IMark or the end of - the IRSB). Contains the address and length of the - instruction. - - It also contains a delta value. The delta must be - subtracted from a guest program counter value before - attempting to establish, by comparison with the address - and length values, whether or not that program counter - value refers to this instruction. For x86, amd64, ppc32, - ppc64 and arm, the delta value is zero. For Thumb - instructions, the delta value is one. This is because, on - Thumb, guest PC values (guest_R15T) are encoded using the - top 31 bits of the instruction address and a 1 in the lsb; - hence they appear to be (numerically) 1 past the start of - the instruction they refer to. IOW, guest_R15T on ARM - holds a standard ARM interworking address. - - ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, - eg. ------ IMark(0x4000792, 5, 0) ------, - */ - struct { - Addr addr; /* instruction address */ - UInt len; /* instruction length */ - UChar delta; /* addr = program counter as encoded in guest state - - delta */ - } IMark; - - /* META: An ABI hint, which says something about this - platform's ABI. - - At the moment, the only AbiHint is one which indicates - that a given chunk of address space, [base .. base+len-1], - has become undefined. This is used on amd64-linux and - some ppc variants to pass stack-redzoning hints to whoever - wants to see them. It also indicates the address of the - next (dynamic) instruction that will be executed. This is - to help Memcheck to origin tracking. - - ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== - eg. ====== AbiHint(t1, 16, t2) ====== - */ - struct { - IRExpr* base; /* Start of undefined chunk */ - Int len; /* Length of undefined chunk */ - IRExpr* nia; /* Address of next (guest) insn */ - } AbiHint; - - /* Write a guest register, at a fixed offset in the guest state. - ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 - */ - struct { - Int offset; /* Offset into the guest state */ - IRExpr* data; /* The value to write */ - } Put; - - /* Write a guest register, at a non-fixed offset in the guest - state. See the comment for GetI expressions for more - information. - - ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, - eg. PUTI(64:8xF64)[t5,0] = t1 - */ - struct { - IRPutI* details; - } PutI; - - /* Assign a value to a temporary. Note that SSA rules require - each tmp is only assigned to once. IR sanity checking will - reject any block containing a temporary which is not assigned - to exactly once. - - ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 - */ - struct { - IRTemp tmp; /* Temporary (LHS of assignment) */ - IRExpr* data; /* Expression (RHS of assignment) */ - } WrTmp; - - /* Write a value to memory. This is a normal store, not a - Store-Conditional. To represent a Store-Conditional, - instead use IRStmt.LLSC. - ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 - */ - struct { - IREndness end; /* Endianness of the store */ - IRExpr* addr; /* store address */ - IRExpr* data; /* value to write */ - } Store; - - /* Guarded store. Note that this is defined to evaluate all - expression fields (addr, data) even if the guard evaluates - to false. - ppIRStmt output: - if (<guard>) ST<end>(<addr>) = <data> */ - struct { - IRStoreG* details; - } StoreG; - - /* Guarded load. Note that this is defined to evaluate all - expression fields (addr, alt) even if the guard evaluates - to false. - ppIRStmt output: - t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */ - struct { - IRLoadG* details; - } LoadG; - - /* Do an atomic compare-and-swap operation. Semantics are - described above on a comment at the definition of IRCAS. - - ppIRStmt output: - t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) - eg - t1 = CASle(t2 :: t3->Add32(t3,1)) - which denotes a 32-bit atomic increment - of a value at address t2 - - A double-element CAS may also be denoted, in which case <tmp>, - <expected> and <new> are all pairs of items, separated by - commas. - */ - struct { - IRCAS* details; - } CAS; - - /* Either Load-Linked or Store-Conditional, depending on - STOREDATA. - - If STOREDATA is NULL then this is a Load-Linked, meaning - that data is loaded from memory as normal, but a - 'reservation' for the address is also lodged in the - hardware. - - result = Load-Linked(addr, end) - - The data transfer type is the type of RESULT (I32, I64, - etc). ppIRStmt output: - - result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) - - If STOREDATA is not NULL then this is a Store-Conditional, - hence: - - result = Store-Conditional(addr, storedata, end) - - The data transfer type is the type of STOREDATA and RESULT - has type Ity_I1. The store may fail or succeed depending - on the state of a previously lodged reservation on this - address. RESULT is written 1 if the store succeeds and 0 - if it fails. eg ppIRStmt output: - - result = ( ST<end>-Cond(<addr>) = <storedata> ) - eg t3 = ( STbe-Cond(t1, t2) ) - - In all cases, the address must be naturally aligned for - the transfer type -- any misaligned addresses should be - caught by a dominating IR check and side exit. This - alignment restriction exists because on at least some - LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on - misaligned addresses, and we have to actually generate - stwcx. on the host, and we don't want it trapping on the - host. - - Summary of rules for transfer type: - STOREDATA == NULL (LL): - transfer type = type of RESULT - STOREDATA != NULL (SC): - transfer type = type of STOREDATA, and RESULT :: Ity_I1 - */ - struct { - IREndness end; - IRTemp result; - IRExpr* addr; - IRExpr* storedata; /* NULL => LL, non-NULL => SC */ - } LLSC; - - /* Call (possibly conditionally) a C function that has side - effects (ie. is "dirty"). See the comments above the - IRDirty type declaration for more information. - - ppIRStmt output: - t<tmp> = DIRTY <guard> <effects> - ::: <callee>(<args>) - eg. - t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) - ::: foo{0x380035f4}(t2) - */ - struct { - IRDirty* details; - } Dirty; - - /* A memory bus event - a fence, or acquisition/release of the - hardware bus lock. IR optimisation treats all these as fences - across which no memory references may be moved. - ppIRStmt output: MBusEvent-Fence, - MBusEvent-BusLock, MBusEvent-BusUnlock. - */ - struct { - IRMBusEvent event; - } MBE; - - /* Conditional exit from the middle of an IRSB. - ppIRStmt output: if (<guard>) goto {<jk>} <dst> - eg. if (t69) goto {Boring} 0x4000AAA:I32 - If <guard> is true, the guest state is also updated by - PUT-ing <dst> at <offsIP>. This is done because a - taken exit must update the guest program counter. - */ - struct { - IRExpr* guard; /* Conditional expression */ - IRConst* dst; /* Jump target (constant only) */ - IRJumpKind jk; /* Jump kind */ - Int offsIP; /* Guest state offset for IP */ - } Exit; - } Ist; - } - IRStmt; +struct _IRStmt { + IRStmtTag tag; + union { + /* A no-op (usually resulting from IR optimisation). Can be + omitted without any effect. + + ppIRStmt output: IR-NoOp + */ + struct { + } NoOp; + + /* META: instruction mark. Marks the start of the statements + that represent a single machine instruction (the end of + those statements is marked by the next IMark or the end of + the IRSB). Contains the address and length of the + instruction. + + It also contains a delta value. The delta must be + subtracted from a guest program counter value before + attempting to establish, by comparison with the address + and length values, whether or not that program counter + value refers to this instruction. For x86, amd64, ppc32, + ppc64 and arm, the delta value is zero. For Thumb + instructions, the delta value is one. This is because, on + Thumb, guest PC values (guest_R15T) are encoded using the + top 31 bits of the instruction address and a 1 in the lsb; + hence they appear to be (numerically) 1 past the start of + the instruction they refer to. IOW, guest_R15T on ARM + holds a standard ARM interworking address. + + ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, + eg. ------ IMark(0x4000792, 5, 0) ------, + */ + struct { + Addr addr; /* instruction address */ + UInt len; /* instruction length */ + UChar delta; /* addr = program counter as encoded in guest state + - delta */ + } IMark; + + /* META: An ABI hint, which says something about this + platform's ABI. + + At the moment, the only AbiHint is one which indicates + that a given chunk of address space, [base .. base+len-1], + has become undefined. This is used on amd64-linux and + some ppc variants to pass stack-redzoning hints to whoever + wants to see them. It also indicates the address of the + next (dynamic) instruction that will be executed. This is + to help Memcheck to origin tracking. + + ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== + eg. ====== AbiHint(t1, 16, t2) ====== + */ + struct { + IRExpr* base; /* Start of undefined chunk */ + Int len; /* Length of undefined chunk */ + IRExpr* nia; /* Address of next (guest) insn */ + } AbiHint; + + /* Write a guest register, at a fixed offset in the guest state. + ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 + */ + struct { + Int offset; /* Offset into the guest state */ + IRExpr* data; /* The value to write */ + } Put; + + /* Write a guest register, at a non-fixed offset in the guest + state. See the comment for GetI expressions for more + information. + + ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, + eg. PUTI(64:8xF64)[t5,0] = t1 + */ + struct { + IRPutI* details; + } PutI; + + /* Assign a value to a temporary. Note that SSA rules require + each tmp is only assigned to once. IR sanity checking will + reject any block containing a temporary which is not assigned + to exactly once. + + ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 + */ + struct { + IRTemp tmp; /* Temporary (LHS of assignment) */ + IRExpr* data; /* Expression (RHS of assignment) */ + } WrTmp; + + /* Write a value to memory. This is a normal store, not a + Store-Conditional. To represent a Store-Conditional, + instead use IRStmt.LLSC. + ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 + */ + struct { + IREndness end; /* Endianness of the store */ + IRExpr* addr; /* store address */ + IRExpr* data; /* value to write */ + } Store; + + /* Guarded store. Note that this is defined to evaluate all + expression fields (addr, data) even if the guard evaluates + to false. + ppIRStmt output: + if (<guard>) ST<end>(<addr>) = <data> */ + struct { + IRStoreG* details; + } StoreG; + + /* Guarded load. Note that this is defined to evaluate all + expression fields (addr, alt) even if the guard evaluates + to false. + ppIRStmt output: + t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */ + struct { + IRLoadG* details; + } LoadG; + + /* Do an atomic compare-and-swap operation. Semantics are + described above on a comment at the definition of IRCAS. + + ppIRStmt output: + t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) + eg + t1 = CASle(t2 :: t3->Add32(t3,1)) + which denotes a 32-bit atomic increment + of a value at address t2 + + A double-element CAS may also be denoted, in which case <tmp>, + <expected> and <new> are all pairs of items, separated by + commas. + */ + struct { + IRCAS* details; + } CAS; + + /* Either Load-Linked or Store-Conditional, depending on + STOREDATA. + + If STOREDATA is NULL then this is a Load-Linked, meaning + that data is loaded from memory as normal, but a + 'reservation' for the address is also lodged in the + hardware. + + result = Load-Linked(addr, end) + + The data transfer type is the type of RESULT (I32, I64, + etc). ppIRStmt output: + + result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) + + If STOREDATA is not NULL then this is a Store-Conditional, + hence: + + result = Store-Conditional(addr, storedata, end) + + The data transfer type is the type of STOREDATA and RESULT + has type Ity_I1. The store may fail or succeed depending + on the state of a previously lodged reservation on this + address. RESULT is written 1 if the store succeeds and 0 + if it fails. eg ppIRStmt output: + + result = ( ST<end>-Cond(<addr>) = <storedata> ) + eg t3 = ( STbe-Cond(t1, t2) ) + + In all cases, the address must be naturally aligned for + the transfer type -- any misaligned addresses should be + caught by a dominating IR check and side exit. This + alignment restriction exists because on at least some + LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on + misaligned addresses, and we have to actually generate + stwcx. on the host, and we don't want it trapping on the + host. + + Summary of rules for transfer type: + STOREDATA == NULL (LL): + transfer type = type of RESULT + STOREDATA != NULL (SC): + transfer type = type of STOREDATA, and RESULT :: Ity_I1 + */ + struct { + IREndness end; + IRTemp result; + IRExpr* addr; + IRExpr* storedata; /* NULL => LL, non-NULL => SC */ + } LLSC; + + /* Call (possibly conditionally) a C function that has side + effects (ie. is "dirty"). See the comments above the + IRDirty type declaration for more information. + + ppIRStmt output: + t<tmp> = DIRTY <guard> <effects> + ::: <callee>(<args>) + eg. + t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) + ::: foo{0x380035f4}(t2) + */ + struct { + IRDirty* details; + } Dirty; + + /* A memory bus event - a fence, or acquisition/release of the + hardware bus lock. IR optimisation treats all these as fences + across which no memory references may be moved. + ppIRStmt output: MBusEvent-Fence, + MBusEvent-BusLock, MBusEvent-BusUnlock. + */ + struct { + IRMBusEvent event; + } MBE; + + /* Conditional exit from the middle of an IRSB. + ppIRStmt output: if (<guard>) goto {<jk>} <dst> + eg. if (t0:69) goto {Boring} 0x4000AAA:I32 + If <guard> is true, the guest state is also updated by + PUT-ing <dst> at <offsIP>. This is done because a + taken exit must update the guest program counter. + TODO-JIT: The condition is going to disappear, making it + unconditional exit. + */ + struct { + IRExpr* guard; /* Conditional expression */ + IRConst* dst; /* Jump target (constant only) */ + IRJumpKind jk; /* Jump kind */ + Int offsIP; /* Guest state offset for IP */ + } Exit; + + /* If-Then-Else control flow diamond. See IRIfThenElse for details. + + ppIRIfThenElse output: + if (<cond>) [<hint>] then { + <IRTempDefSet> + <IRStmtVec> + } else { + <IRTempDefSet> + <IRStmtVec> + } + <phi-nodes> + + eg. if (t3) [IfThenElse_ThenLikely] then { + t4:I32 t7:I32 + + t4=0x2 + t7=Add32(t2,t1) + } else { + t5:I32 + + t5=0x3 + } + t6=phi(t4,t5) */ + struct { + IRIfThenElse* details; + } IfThenElse; + } Ist; +}; /* Statement constructors. */ extern IRStmt* IRStmt_NoOp ( void ); @@ -2985,45 +3194,63 @@ extern IRStmt* IRStmt_Dirty ( IRDirty* details ); extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, Int offsIP ); +extern IRStmt* IRStmt_IfThenElse(IRExpr* cond, IRIfThenElse_Hint hint, + IRStmtVec* then_leg, IRStmtVec* else_leg, + IRPhiVec* phi_nodes); -/* Deep-copy an IRStmt. */ -extern IRStmt* deepCopyIRStmt ( const IRStmt* ); +/* Deep-copy an IRStmt. + Parent is required for "if-then-else" statements. */ +extern IRStmt* deepCopyIRStmt(const IRStmt* src, IRStmtVec* parent); -/* Pretty-print an IRStmt. */ -extern void ppIRStmt ( const IRStmt* ); +/* Pretty-prints an IRStmt. 'tyenv' is eventually used for pretty-printing + nested IRStmtVec's and can be NULL. */ +extern void ppIRStmt(const IRStmt* stmt, const IRTypeEnv* tyenv, UInt depth); /* ------------------ Basic Blocks ------------------ */ -/* Type environments: a bunch of statements, expressions, etc, are - incomplete without an environment indicating the type of each - IRTemp. So this provides one. IR temporaries are really just - unsigned ints and so this provides an array, 0 .. n_types_used-1 of - them. +/* Type environments: a bunch of statements, expressions, etc, are incomplete + without an environment indicating the type of each IRTemp and its scope. + So this provides one. IR temporaries are really just unsigned ints so they + can used to index these two arrays: + - 'types' which gives IRTemp's type + - 'ids' which gives ID of the defining IRStmtVec */ -typedef - struct { - IRType* types; - Int types_size; - Int types_used; - } - IRTypeEnv; +struct _IRTypeEnv { + IRType* types; + IRStmtVecID* ids; + UInt size; + UInt used; +}; -/* Obtain a new IRTemp */ -extern IRTemp newIRTemp ( IRTypeEnv*, IRType ); +/* Obtain a new IRTemp. New IRTemp is allocated from 'tyenv' and is marked + as defined in 'stmts'->defset. */ +extern IRTemp newIRTemp(IRTypeEnv* tyenv, IRStmtVec* stmts, IRType); /* Deep-copy a type environment */ extern IRTypeEnv* deepCopyIRTypeEnv ( const IRTypeEnv* ); -/* Pretty-print a type environment */ +/* Pretty-print a type environment. Use ppIRTypeEnvDefd() if possible which + combines also information from an IRTempDefSet to print only IRTemp's which + are defined in a given IRStmtVec. */ extern void ppIRTypeEnv ( const IRTypeEnv* ); +/* Much like ppIRTypeEnv() but prints only IRTemp's which are defined in a given + IRStmtVec. */ +extern void ppIRTypeEnvDefd(const IRTypeEnv* tyenv, const IRTempDefSet* defset, + UInt depth); + +/* Ensures that this IRTypeEnv can hold at least new_size types and ids. + Useful for certain bulk transformations. */ +extern void ensureSpaceInIRTypeEnv(IRTypeEnv*, UInt new_size); + /* Code blocks, which in proper compiler terminology are superblocks (single entry, multiple exit code sequences) contain: - - A table giving a type for each temp (the "type environment") - - An expandable array of statements + - A type environment (giving type for each temp and where it is defined) + - A vector of statements + - A sequence used to get a unique IRStmtVecID for nested IRStmtVec's - An expression of type 32 or 64 bits, depending on the guest's word size, indicating the next destination if the block executes all the way to the end, without a side exit @@ -3036,17 +3263,16 @@ extern void ppIRTypeEnv ( const IRTypeEnv* ); */ typedef struct { - IRTypeEnv* tyenv; - IRStmt** stmts; - Int stmts_size; - Int stmts_used; - IRExpr* next; - IRJumpKind jumpkind; - Int offsIP; + IRTypeEnv* tyenv; + IRStmtVec* stmts; + IRStmtVecID id_seq; + IRExpr* next; + IRJumpKind jumpkind; + Int offsIP; } IRSB; -/* Allocate a new, uninitialised IRSB */ +/* Allocates an empty IRSB. The corresponding IRStmtVec has ID #0. */ extern IRSB* emptyIRSB ( void ); /* Deep-copy an IRSB */ @@ -3059,17 +3285,24 @@ extern IRSB* deepCopyIRSBExceptStmts ( const IRSB* ); /* Pretty-print an IRSB */ extern void ppIRSB ( const IRSB* ); -/* Append an IRStmt to an IRSB */ -extern void addStmtToIRSB ( IRSB*, IRStmt* ); +/* Append an IRStmt to the main IRStmtVec (ID #0) of an IRSB. + Function addStmtToIRStmtVec() should be used instead. */ +extern void addStmtToIRSB(IRSB*, IRStmt*) __attribute__ ((deprecated)); +extern void addStmtToIRStmtVec(IRStmtVec*, IRStmt*); + +extern IRStmtVecID nextIRStmtVecID(IRSB*); + +/* Allocates an empty IfThenElse, assigns it a valid IRStmtVecID + and sets the parent for both then and else legs. + The returned IRStmt is added to the parent IRStmtVec and ready to be used. */ +extern IRStmt *addEmptyIfThenElse(IRSB* bb, IRStmtVec* parent, IRExpr* cond, + IRIfThenElse_Hint hint); /*---------------------------------------------------------------*/ /*--- Helper functions for the IR ---*/ /*---------------------------------------------------------------*/ -/* For messing with IR type environments */ -extern IRTypeEnv* emptyIRTypeEnv ( void ); - /* What is the type of this expression? */ extern IRType typeOfIRConst ( const IRConst* ); extern IRType typeOfIRTemp ( const IRTypeEnv*, IRTemp ); @@ -3080,12 +3313,12 @@ extern void typeOfIRLoadGOp ( IRLoadGOp cvt, /*OUT*/IRType* t_res, /*OUT*/IRType* t_arg ); -/* Sanity check a BB of IR */ -extern void sanityCheckIRSB ( const IRSB* bb, - const HChar* caller, - Bool require_flatness, - IRType guest_word_size ); -extern Bool isFlatIRStmt ( const IRStmt* ); +/* Sanity check a BB of IR. */ +extern void sanityCheckIRSB(const IRSB* bb, + const HChar* caller, + Bool require_flatness, + IRType guest_word_size); +extern Bool isFlatIRSB(const IRSB*); /* Is this any value actually in the enumeration 'IRType' ? */ extern Bool isPlausibleIRType ( IRType ty ); @@ -3095,7 +3328,7 @@ extern Bool isPlausibleIRType ( IRType ty ); /*--- IR injection ---*/ /*---------------------------------------------------------------*/ -void vex_inject_ir(IRSB *, IREndness); +void vex_inject_ir(IRSB* , IREndness); #endif /* ndef __LIBVEX_IR_H */ |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:18
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=45b9d41ea0fd40e102557116e063d9eea8de3c9f commit 45b9d41ea0fd40e102557116e063d9eea8de3c9f Author: Ivo Raisr <iv...@iv...> Date: Mon Aug 28 23:14:55 2017 +0200 Focus initially on x86 architecture and Memcheck tool. Diff: --- Makefile.am | 26 +++++++++++++++----------- Makefile.vex.am | 28 ++-------------------------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/Makefile.am b/Makefile.am index fdce3cf..85f7dc4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,17 +4,21 @@ AUTOMAKE_OPTIONS = foreign 1.10 dist-bzip2 include $(top_srcdir)/Makefile.all.am TOOLS = memcheck \ - cachegrind \ - callgrind \ - massif \ - lackey \ - none \ - helgrind \ - drd - -EXP_TOOLS = exp-sgcheck \ - exp-bbv \ - exp-dhat + none + +# TODO-JIT: +# cachegrind \ +# callgrind \ +# massif \ +# lackey \ +# helgrind \ +# drd + +EXP_TOOLS = +# TODO-JIT: +# exp-sgcheck \ +# exp-bbv \ +# exp-dhat # Put docs last because building the HTML is slow and we want to get # everything else working before we try it. diff --git a/Makefile.vex.am b/Makefile.vex.am index 4ad5ffa..e0c17e4 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -125,18 +125,6 @@ LIBVEX_SOURCES_COMMON = \ priv/guest_generic_x87.c \ priv/guest_x86_helpers.c \ priv/guest_x86_toIR.c \ - priv/guest_amd64_helpers.c \ - priv/guest_amd64_toIR.c \ - priv/guest_ppc_helpers.c \ - priv/guest_ppc_toIR.c \ - priv/guest_arm_helpers.c \ - priv/guest_arm_toIR.c \ - priv/guest_arm64_helpers.c \ - priv/guest_arm64_toIR.c \ - priv/guest_s390_helpers.c \ - priv/guest_s390_toIR.c \ - priv/guest_mips_helpers.c \ - priv/guest_mips_toIR.c \ priv/host_generic_regs.c \ priv/host_generic_simd64.c \ priv/host_generic_simd128.c \ @@ -145,20 +133,8 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_reg_alloc2.c \ priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ - priv/host_x86_isel.c \ - priv/host_amd64_defs.c \ - priv/host_amd64_isel.c \ - priv/host_ppc_defs.c \ - priv/host_ppc_isel.c \ - priv/host_arm_defs.c \ - priv/host_arm_isel.c \ - priv/host_arm64_defs.c \ - priv/host_arm64_isel.c \ - priv/host_s390_defs.c \ - priv/host_s390_isel.c \ - priv/s390_disasm.c \ - priv/host_mips_defs.c \ - priv/host_mips_isel.c + priv/host_x86_isel.c +# TODO-JIT: other architectures disabled for now LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c |
|
From: Ivo R. <ir...@so...> - 2017-08-28 22:26:11
|
The branch 'jit-hacks-2' was created pointing to: 613905c... Add some support for If-Then-Else into VEX register allocat |
|
From: Ivo R. <ir...@so...> - 2017-08-28 10:40:38
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=efa1e5ef8d257e3b20facf6f04350d29578ae9e4 commit efa1e5ef8d257e3b20facf6f04350d29578ae9e4 Author: Ivo Raisr <iv...@iv...> Date: Sat Aug 26 00:19:05 2017 +0200 VEX register allocator version 3. Implements a new version of VEX register allocator which keeps the main state per virtual registers, as opposed to real registers in v2. This results in a simpler and cleaner design and much simpler implementation. It has been observed that the new allocator executes 20-30% faster than the previous one but could produce slightly worse spilling decisions. Overall performance improvement when running the Valgrind performance regression test suite has been observed in terms of a few percent. The new register allocator (v3) is now the default one. The old register allocator (v2) is still kept around and can be activated with command line option '--vex-regalloc-version=2'. Fixes BZ#381553. Diff: --- Makefile.vex.am | 1 + NEWS | 1 + VEX/priv/host_amd64_defs.c | 43 +- VEX/priv/host_amd64_defs.h | 29 +- VEX/priv/host_arm64_defs.c | 52 +- VEX/priv/host_arm64_defs.h | 3 +- VEX/priv/host_arm_defs.c | 44 +- VEX/priv/host_arm_defs.h | 3 +- VEX/priv/host_generic_reg_alloc2.c | 179 ++---- VEX/priv/host_generic_reg_alloc3.c | 1171 ++++++++++++++++++++++++++++++++++++ VEX/priv/host_generic_regs.c | 49 +- VEX/priv/host_generic_regs.h | 108 ++-- VEX/priv/host_mips_defs.c | 39 +- VEX/priv/host_mips_defs.h | 3 +- VEX/priv/host_ppc_defs.c | 37 +- VEX/priv/host_ppc_defs.h | 3 +- VEX/priv/host_s390_defs.c | 25 +- VEX/priv/host_s390_defs.h | 3 +- VEX/priv/host_x86_defs.c | 36 +- VEX/priv/host_x86_defs.h | 4 +- VEX/priv/main_main.c | 36 +- VEX/priv/main_util.c | 37 +- VEX/pub/libvex.h | 5 + coregrind/m_main.c | 3 + none/tests/cmdline2.stdout.exp | 1 + 25 files changed, 1628 insertions(+), 287 deletions(-) diff --git a/Makefile.vex.am b/Makefile.vex.am index 9b9b9b5..4ad5ffa 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -143,6 +143,7 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_simd256.c \ priv/host_generic_maddf.c \ priv/host_generic_reg_alloc2.c \ + priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ priv/host_x86_isel.c \ priv/host_amd64_defs.c \ diff --git a/NEWS b/NEWS index 516c4cc..446a7fa 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,7 @@ where XXXXXX is the bug number as listed below. 381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support 381289 epoll_pwait can have a NULL sigmask 381274 powerpc too chatty even with --sigill-diagnostics=no +381553 VEX register allocator v3 381769 Use ucontext_t instead of struct ucontext 381805 arm32 needs ld.so index hardwire for new glibc security fixes 382256 gz compiler flag test doesn't work for gold diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 5e0600a..ebe2b00 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregAMD64_RSI(); ru->regs[ru->size++] = hregAMD64_RDI(); ru->regs[ru->size++] = hregAMD64_R8(); @@ -72,6 +73,10 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_R14(); ru->regs[ru->size++] = hregAMD64_R15(); ru->regs[ru->size++] = hregAMD64_RBX(); + ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregAMD64_XMM3(); ru->regs[ru->size++] = hregAMD64_XMM4(); ru->regs[ru->size++] = hregAMD64_XMM5(); @@ -82,8 +87,9 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_XMM10(); ru->regs[ru->size++] = hregAMD64_XMM11(); ru->regs[ru->size++] = hregAMD64_XMM12(); - ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregAMD64_RAX(); ru->regs[ru->size++] = hregAMD64_RCX(); @@ -101,7 +107,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) } -void ppHRegAMD64 ( HReg reg ) +UInt ppHRegAMD64 ( HReg reg ) { Int r; static const HChar* ireg64_names[16] @@ -109,27 +115,24 @@ void ppHRegAMD64 ( HReg reg ) "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg64_names[r]); - return; + return vex_printf("%s", ireg64_names[r]); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegAMD64"); } } -static void ppHRegAMD64_lo32 ( HReg reg ) +static UInt ppHRegAMD64_lo32 ( HReg reg ) { Int r; static const HChar* ireg32_names[16] @@ -137,17 +140,16 @@ static void ppHRegAMD64_lo32 ( HReg reg ) "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - vex_printf("d"); - return; + UInt written = ppHReg(reg); + written += vex_printf("d"); + return written; } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); default: vpanic("ppHRegAMD64_lo32: invalid regclass"); } @@ -1995,6 +1997,19 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to); + case HRcVec128: + return AMD64Instr_SseReRg(Asse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_AMD64: unimplemented regclass"); + } +} + AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off ) { vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 39682ef..8a3eea8 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -56,19 +56,18 @@ ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); } ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); } ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); } ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); } - -ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); } -ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); } -ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); } -ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); } -ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); } -ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); } -ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); } -ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); } -ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); } -ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); } - -ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); } +ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); } + +ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); } +ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); } +ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); } +ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); } +ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); } +ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); } +ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); } +ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); } +ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); } +ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); } ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); } ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); } @@ -81,7 +80,7 @@ ST_IN HReg hregAMD64_XMM0 ( void ) { return mkHReg(False, HRcVec128, 0, 26); } ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); } #undef ST_IN -extern void ppHRegAMD64 ( HReg ); +extern UInt ppHRegAMD64 ( HReg ); /* --------- Condition codes, AMD encoding. --------- */ @@ -801,7 +800,7 @@ extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool); extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, HReg vreg, Short spill_off ); diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 380a24d..2506512 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -64,7 +64,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ - + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregARM64_X22(); ru->regs[ru->size++] = hregARM64_X23(); ru->regs[ru->size++] = hregARM64_X24(); @@ -81,6 +81,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X5(); ru->regs[ru->size++] = hregARM64_X6(); ru->regs[ru->size++] = hregARM64_X7(); + ru->allocable_end[HRcInt64] = ru->size - 1; // X8 is used as a ProfInc temporary, not available to regalloc. // X9 is a chaining/spill temporary, not available to regalloc. @@ -94,19 +95,23 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // X21 is the guest state pointer, not available to regalloc. // vector regs. Unfortunately not callee-saved. + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM64_Q16(); ru->regs[ru->size++] = hregARM64_Q17(); ru->regs[ru->size++] = hregARM64_Q18(); ru->regs[ru->size++] = hregARM64_Q19(); ru->regs[ru->size++] = hregARM64_Q20(); + ru->allocable_end[HRcVec128] = ru->size - 1; // F64 regs, all of which are callee-saved + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM64_D8(); ru->regs[ru->size++] = hregARM64_D9(); ru->regs[ru->size++] = hregARM64_D10(); ru->regs[ru->size++] = hregARM64_D11(); ru->regs[ru->size++] = hregARM64_D12(); ru->regs[ru->size++] = hregARM64_D13(); + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -142,43 +147,41 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) } -void ppHRegARM64 ( HReg reg ) { +UInt ppHRegARM64 ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 31); - vex_printf("x%d", r); - return; + return vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM64"); } } -static void ppHRegARM64asSreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(S-reg)"); +static UInt ppHRegARM64asSreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(S-reg)"); + return written; } -static void ppHRegARM64asHreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(H-reg)"); +static UInt ppHRegARM64asHreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(H-reg)"); + return written; } @@ -1745,7 +1748,7 @@ void ppARM64Instr ( const ARM64Instr* i ) { ppHRegARM64asSreg(i->ARM64in.VCmpS.argR); return; case ARM64in_VFCSel: { - void (*ppHRegARM64fp)(HReg) + UInt (*ppHRegARM64fp)(HReg) = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg); vex_printf("fcsel "); ppHRegARM64fp(i->ARM64in.VFCSel.dst); @@ -2616,6 +2619,21 @@ void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return ARM64Instr_MovI(to, from); + case HRcFlt64: + return ARM64Instr_VMov(8, to, from); + case HRcVec128: + return ARM64Instr_VMov(16, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM64: unimplemented regclass"); + } +} + /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 14b2de6..e7da4f9 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } #undef ST_IN -extern void ppHRegARM64 ( HReg ); +extern UInt ppHRegARM64 ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM64_N_ARGREGS 8 /* x0 .. x7 */ @@ -1007,6 +1007,7 @@ extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM64 ( void ); diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index a986f37..9bf87cd 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) /* Callee saves ones are listed first, since we prefer them if they're available. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregARM_R4(); ru->regs[ru->size++] = hregARM_R5(); ru->regs[ru->size++] = hregARM_R6(); @@ -80,24 +81,34 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) ru->regs[ru->size++] = hregARM_R2(); ru->regs[ru->size++] = hregARM_R3(); ru->regs[ru->size++] = hregARM_R9(); + ru->allocable_end[HRcInt32] = ru->size - 1; + /* FP registers. Note: these are all callee-save. Yay! Hence we don't need to mention them as trashed in getHRegUsage for ARMInstr_Call. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM_D8(); ru->regs[ru->size++] = hregARM_D9(); ru->regs[ru->size++] = hregARM_D10(); ru->regs[ru->size++] = hregARM_D11(); ru->regs[ru->size++] = hregARM_D12(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcFlt32] = ru->size; ru->regs[ru->size++] = hregARM_S26(); ru->regs[ru->size++] = hregARM_S27(); ru->regs[ru->size++] = hregARM_S28(); ru->regs[ru->size++] = hregARM_S29(); ru->regs[ru->size++] = hregARM_S30(); + ru->allocable_end[HRcFlt32] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM_Q8(); ru->regs[ru->size++] = hregARM_Q9(); ru->regs[ru->size++] = hregARM_Q10(); ru->regs[ru->size++] = hregARM_Q11(); ru->regs[ru->size++] = hregARM_Q12(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -140,35 +151,30 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) } -void ppHRegARM ( HReg reg ) { +UInt ppHRegARM ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("r%d", r); - return; + return vex_printf("r%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("s%d", r); - return; + return vex_printf("s%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM"); } @@ -2772,6 +2778,22 @@ void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return ARMInstr_Mov(to, ARMRI84_R(from)); + case HRcFlt32: + return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from); + case HRcFlt64: + return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from); + case HRcVec128: + return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM: unimplemented regclass"); + } +} /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index e8a2eb7..56c4ec5 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -81,7 +81,7 @@ ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); } ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); } #undef ST_IN -extern void ppHRegARM ( HReg ); +extern UInt ppHRegARM ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */ @@ -1070,6 +1070,7 @@ extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM ( void ); diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c index 2294a9b..ec291d3 100644 --- a/VEX/priv/host_generic_reg_alloc2.c +++ b/VEX/priv/host_generic_reg_alloc2.c @@ -294,49 +294,6 @@ static inline UInt ULong__minIndex ( ULong w64 ) { } -/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ -static void* local_memset ( void *destV, Int c, SizeT sz ) -{ -# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) - - UInt c4; - UChar* d = destV; - UChar uc = c; - - while ((!IS_4_ALIGNED(d)) && sz >= 1) { - d[0] = uc; - d++; - sz--; - } - if (sz == 0) - return destV; - c4 = uc; - c4 |= (c4 << 8); - c4 |= (c4 << 16); - while (sz >= 16) { - ((UInt*)d)[0] = c4; - ((UInt*)d)[1] = c4; - ((UInt*)d)[2] = c4; - ((UInt*)d)[3] = c4; - d += 16; - sz -= 16; - } - while (sz >= 4) { - ((UInt*)d)[0] = c4; - d += 4; - sz -= 4; - } - while (sz >= 1) { - d[0] = c; - d++; - sz--; - } - return destV; - -# undef IS_4_ALIGNED -} - - /* A target-independent register allocator. Requires various functions which it uses to deal abstractly with instructions and registers, since it cannot have any target-specific knowledge. @@ -352,44 +309,13 @@ static void* local_memset ( void *destV, Int c, SizeT sz ) Takes an expandable array of pointers to unallocated insns. Returns an expandable array of pointers to allocated insns. */ -HInstrArray* doRegisterAllocation ( +HInstrArray* doRegisterAllocation_v2 ( /* Incoming virtual-registerised code. */ HInstrArray* instrs_in, - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), - - /* Return one, or, if we're unlucky, two insn(s) to spill/restore a - real reg to a spill slot byte offset. The two leading HInstr** - args are out parameters, through which the generated insns are - returned. Also (optionally) a 'directReload' function, which - attempts to replace a given instruction by one which reads - directly from a specified spill slot. May be NULL, in which - case the optimisation is not attempted. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppReg) ( HReg ), - - /* 32/64bit mode */ - Bool mode64 + /* Register allocator controls to use. */ + const RegAllocControl* con ) { # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) @@ -447,7 +373,7 @@ HInstrArray* doRegisterAllocation ( not at each insn processed. */ Bool do_sanity_check; - vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); + vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (N_SPILL64S % 2)); @@ -463,7 +389,7 @@ HInstrArray* doRegisterAllocation ( HInstr* _tmp = (_instr); \ if (DEBUG_REGALLOC) { \ vex_printf("** "); \ - (*ppInstr)(_tmp, mode64); \ + con->ppInstr(_tmp, con->mode64); \ vex_printf("\n\n"); \ } \ addHInstr ( instrs_out, _tmp ); \ @@ -474,13 +400,13 @@ HInstrArray* doRegisterAllocation ( Int z, q; \ for (z = 0; z < n_rregs; z++) { \ vex_printf(" rreg_state[%2d] = ", z); \ - (*ppReg)(univ->regs[z]); \ + con->ppReg(con->univ->regs[z]); \ vex_printf(" \t"); \ switch (rreg_state[z].disp) { \ case Free: vex_printf("Free\n"); break; \ case Unavail: vex_printf("Unavail\n"); break; \ case Bound: vex_printf("BoundTo "); \ - (*ppReg)(rreg_state[z].vreg); \ + con->ppReg(rreg_state[z].vreg); \ vex_printf("\n"); break; \ } \ } \ @@ -505,7 +431,7 @@ HInstrArray* doRegisterAllocation ( /* ... and initialise running state. */ /* n_rregs is no more than a short name for n_available_real_regs. */ - n_rregs = univ->allocable; + n_rregs = con->univ->allocable; n_vregs = instrs_in->n_vregs; /* If this is not so, vreg_state entries will overflow. */ @@ -586,13 +512,13 @@ HInstrArray* doRegisterAllocation ( for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); + con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64); if (0) { vex_printf("\n%d stage1: ", ii); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); - ppHRegUsage(univ, ®_usage_arr[ii]); + ppHRegUsage(con->univ, ®_usage_arr[ii]); } /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ @@ -606,7 +532,7 @@ HInstrArray* doRegisterAllocation ( Int k = hregIndex(vreg); if (k < 0 || k >= n_vregs) { vex_printf("\n"); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); vpanic("doRegisterAllocation: out-of-range vreg"); @@ -711,10 +637,10 @@ HInstrArray* doRegisterAllocation ( } else if (!isW && isR) { if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Read"); @@ -724,10 +650,10 @@ HInstrArray* doRegisterAllocation ( vassert(isR && isW); if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Modify"); @@ -741,7 +667,7 @@ HInstrArray* doRegisterAllocation ( ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); if (0) vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; + rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); rreg_lrs_used++; @@ -778,7 +704,7 @@ HInstrArray* doRegisterAllocation ( if (0) vex_printf("FLUSH 2 (%d,%d)\n", rreg_live_after[j], rreg_dead_before[j]); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; + rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); rreg_lrs_used++; @@ -805,7 +731,7 @@ HInstrArray* doRegisterAllocation ( for (Int j = 0; j < n_rregs; j++) { if (!rreg_state[j].has_hlrs) continue; - ppReg(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf(" hinted\n"); } } @@ -841,14 +767,14 @@ HInstrArray* doRegisterAllocation ( vex_printf("RRegLRs by LA:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); - (*ppReg)(rreg_lrs_la[j].rreg); + con->ppReg(rreg_lrs_la[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); } vex_printf("RRegLRs by DB:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); - (*ppReg)(rreg_lrs_db[j].rreg); + con->ppReg(rreg_lrs_db[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); } @@ -882,7 +808,7 @@ HInstrArray* doRegisterAllocation ( */ /* Int max_ss_no = -1; */ - local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); + vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before)); for (Int j = 0; j < n_vregs; j++) { @@ -940,7 +866,7 @@ HInstrArray* doRegisterAllocation ( /* This reflects LibVEX's hard-wired knowledge of the baseBlock layout: the guest state, then two equal sized areas following it for two sets of shadow state, and then the spill area. */ - vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); + vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8); /* Independent check that we've made a sane choice of slot */ sanity_check_spill_offset( &vreg_lrs[j] ); @@ -983,7 +909,7 @@ HInstrArray* doRegisterAllocation ( if (DEBUG_REGALLOC) { vex_printf("\n====----====---- Insn %d ----====----====\n", ii); vex_printf("---- "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n\nInitial state:\n"); PRINT_STATE; vex_printf("\n"); @@ -1018,7 +944,7 @@ HInstrArray* doRegisterAllocation ( vex_printf("considering la %d .. db %d reg = ", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before); - (*ppReg)(reg); + con->ppReg(reg); vex_printf("\n"); } @@ -1059,7 +985,7 @@ HInstrArray* doRegisterAllocation ( vassert(rreg_state[j].eq_spill_slot == False); continue; } - vassert(hregClass(univ->regs[j]) + vassert(hregClass(con->univ->regs[j]) == hregClass(rreg_state[j].vreg)); vassert( hregIsVirtual(rreg_state[j].vreg)); } @@ -1099,7 +1025,7 @@ HInstrArray* doRegisterAllocation ( the dst to the src's rreg, and that's all. */ HReg vregS = INVALID_HREG; HReg vregD = INVALID_HREG; - if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { + if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) { if (!hregIsVirtual(vregS)) goto cannot_coalesce; if (!hregIsVirtual(vregD)) goto cannot_coalesce; /* Check that *isMove is not telling us a bunch of lies ... */ @@ -1112,9 +1038,9 @@ HInstrArray* doRegisterAllocation ( if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; if (DEBUG_REGALLOC) { vex_printf("COALESCE "); - (*ppReg)(vregS); + con->ppReg(vregS); vex_printf(" -> "); - (*ppReg)(vregD); + con->ppReg(vregD); vex_printf("\n\n"); } /* Find the state entry for vregS. */ @@ -1163,7 +1089,7 @@ HInstrArray* doRegisterAllocation ( vreg_state[m] = INVALID_RREG_NO; if (DEBUG_REGALLOC) { vex_printf("free up "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); } } @@ -1204,7 +1130,7 @@ HInstrArray* doRegisterAllocation ( than before it. */ if (DEBUG_REGALLOC) { vex_printf("need to free up rreg: "); - (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); + con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg); vex_printf("\n\n"); } Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); @@ -1223,8 +1149,8 @@ HInstrArray* doRegisterAllocation ( if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[k], - vreg_lrs[m].spill_offset, mode64 ); + con->genSpill(&spill1, &spill2, con->univ->regs[k], + vreg_lrs[m].spill_offset, con->mode64); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); @@ -1271,7 +1197,7 @@ HInstrArray* doRegisterAllocation ( that the change is invisible to the standard-case handling that follows. */ - if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { + if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) { Bool debug_direct_reload = False; HReg cand = INVALID_HREG; Bool nreads = 0; @@ -1305,19 +1231,20 @@ HInstrArray* doRegisterAllocation ( vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], reg_usage_arr[ii].vRegs[1])); - reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); + reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff); if (debug_direct_reload && !reloaded) { vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); - ppInstr(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); } if (reloaded) { /* Update info about the insn, so it looks as if it had been in this form all along. */ instrs_in->arr[ii] = reloaded; - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); + con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], + con->mode64); if (debug_direct_reload && !reloaded) { vex_printf(" --> "); - ppInstr(reloaded, mode64); + con->ppInstr(reloaded, con->mode64); } } @@ -1336,7 +1263,7 @@ HInstrArray* doRegisterAllocation ( vassert(hregIsVirtual(vreg)); if (0) { - vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); + vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); } /* Now we're trying to find a rreg for "vreg". First of all, @@ -1347,7 +1274,7 @@ HInstrArray* doRegisterAllocation ( Int n = vreg_state[m]; if (IS_VALID_RREGNO(n)) { vassert(rreg_state[n].disp == Bound); - addToHRegRemap(&remap, vreg, univ->regs[n]); + addToHRegRemap(&remap, vreg, con->univ->regs[n]); /* If this rreg is written or modified, mark it as different from any spill slot value. */ if (reg_usage_arr[ii].vMode[j] != HRmRead) @@ -1366,7 +1293,7 @@ HInstrArray* doRegisterAllocation ( Int k; for (k = 0; k < n_rregs; k++) { if (rreg_state[k].disp != Free - || hregClass(univ->regs[k]) != hregClass(vreg)) + || hregClass(con->univ->regs[k]) != hregClass(vreg)) continue; if (rreg_state[k].has_hlrs) { /* Well, at least we can use k_suboptimal if we really @@ -1387,7 +1314,7 @@ HInstrArray* doRegisterAllocation ( Int p = hregIndex(vreg); vassert(IS_VALID_VREGNO(p)); vreg_state[p] = toShort(k); - addToHRegRemap(&remap, vreg, univ->regs[k]); + addToHRegRemap(&remap, vreg, con->univ->regs[k]); /* Generate a reload if needed. This only creates needed reloads because the live range builder for vregs will guarantee that the first event for a vreg is a write. @@ -1398,8 +1325,8 @@ HInstrArray* doRegisterAllocation ( vassert(vreg_lrs[p].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[k], - vreg_lrs[p].spill_offset, mode64 ); + con->genReload(&reload1, &reload2, con->univ->regs[k], + vreg_lrs[p].spill_offset, con->mode64); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); @@ -1433,7 +1360,7 @@ HInstrArray* doRegisterAllocation ( rreg_state[k].is_spill_cand = False; if (rreg_state[k].disp != Bound) continue; - if (hregClass(univ->regs[k]) != hregClass(vreg)) + if (hregClass(con->univ->regs[k]) != hregClass(vreg)) continue; rreg_state[k].is_spill_cand = True; /* Note, the following loop visits only the virtual regs @@ -1468,7 +1395,7 @@ HInstrArray* doRegisterAllocation ( vassert(IS_VALID_RREGNO(spillee)); vassert(rreg_state[spillee].disp == Bound); /* check it's the right class */ - vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); + vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg)); /* check we're not ejecting the vreg for which we are trying to free up a register. */ vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); @@ -1483,8 +1410,8 @@ HInstrArray* doRegisterAllocation ( if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); + con->genSpill(&spill1, &spill2, con->univ->regs[spillee], + vreg_lrs[m].spill_offset, con->mode64); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); @@ -1509,8 +1436,8 @@ HInstrArray* doRegisterAllocation ( vassert(vreg_lrs[m].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); + con->genReload(&reload1, &reload2, con->univ->regs[spillee], + vreg_lrs[m].spill_offset, con->mode64); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); @@ -1529,7 +1456,7 @@ HInstrArray* doRegisterAllocation ( /* So after much twisting and turning, we have vreg mapped to rreg_state[spillee].rreg. Note that in the map. */ - addToHRegRemap(&remap, vreg, univ->regs[spillee]); + addToHRegRemap(&remap, vreg, con->univ->regs[spillee]); } /* iterate over virtual registers in this instruction. */ @@ -1545,7 +1472,7 @@ HInstrArray* doRegisterAllocation ( */ /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ - (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); + con->mapRegs(&remap, instrs_in->arr[ii], con->mode64); EMIT_INSTR( instrs_in->arr[ii] ); if (DEBUG_REGALLOC) { diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c new file mode 100644 index 0000000..f798372 --- /dev/null +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -0,0 +1,1171 @@ +/*----------------------------------------------------------------------------*/ +/*--- begin host_generic_reg_alloc3.c ---*/ +/*----------------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation framework. + + Copyright (C) 2017-2017 Ivo Raisr + iv...@iv... + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex.h" + +#include "main_util.h" +#include "host_generic_regs.h" + +/* Set to 1 for lots of debugging output. */ +#define DEBUG_REGALLOC 0 + +/* Set to 1 for sanity checking at every instruction. + Set to 0 for sanity checking only every 17th one and the last one. */ +#define SANITY_CHECKS_EVERY_INSTR 0 + + +#define INVALID_INSTRNO (-2) + +/* Register allocator state is kept in an array of VRegState's. + There is an element for every virtual register (vreg). + Elements are indexed [0 .. n_vregs-1]. + Records information about vreg live range and its state. */ +typedef + struct { + /* Live range, register class and spill offset are computed during the + first register allocator pass and remain unchanged after that. */ + + /* This vreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This vreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + /* What kind of register this is. */ + HRegClass reg_class; + + /* What is its current disposition? */ + enum { Unallocated, /* Neither spilled nor assigned to a real reg. */ + Assigned, /* Assigned to a real register, viz rreg. */ + Spilled /* Spilled to the spill slot. */ + } disp; + + /* If .disp == Assigned, what rreg is it bound to? */ + HReg rreg; + + /* The "home" spill slot. The offset is relative to the beginning of + the guest state. */ + UShort spill_offset; + } + VRegState; + +/* The allocator also maintains a redundant array of indexes (rreg_state) from + rreg numbers back to entries in vreg_state. It is redundant because iff + rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the + two entries point at each other. The purpose of this is to speed up + activities which involve looking for a particular rreg: there is no need to + scan the vreg_state looking for it, just index directly into rreg_state. + The FAQ "does this rreg already have an associated vreg" is the main + beneficiary. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + /* What is its current disposition? */ + enum { Free, /* Not bound to any vreg. */ + Bound, /* Bound to a vreg, viz vreg. */ + Reserved /* Reserved for an instruction. */ + } disp; + + /* If .disp == Bound, what vreg is it bound to? */ + HReg vreg; + } + RRegState; + +/* Records information on a real-register live range, associated with + a particular real register. Computed once; does not change. */ +typedef + struct { + /* This rreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This rreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + } + RRegLR; + +/* Live ranges for a single rreg and the current one. + Live ranges are computed during the first register allocator pass and remain + unchanged after that. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_lr_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + RRegLR* lrs; + UInt lrs_size; + UInt lrs_used; + + /* Live range corresponding to the currently processed instruction. + Points into |lrs| array. */ + RRegLR *lr_current; + UInt lr_current_idx; + } + RRegLRState; + +#define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs) +#define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs) + +/* Compute the index of the highest and lowest 1 in a ULong, respectively. + Results are undefined if the argument is zero. Don't pass it zero :) */ +static inline UInt ULong__maxIndex ( ULong w64 ) { + return 63 - __builtin_clzll(w64); +} + +static inline UInt ULong__minIndex ( ULong w64 ) { + return __builtin_ctzll(w64); +} + +static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs) +{ + vassert(rreg_lrs->lrs_used == rreg_lrs->lrs_size); + + RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg_lrs->lrs_used * sizeof(RRegLR)); + for (UInt l = 0; l < rreg_lrs->lrs_used; l++) { + lr2[l] = rreg_lrs->lrs[l]; + } + + rreg_lrs->lrs = lr2; + rreg_lrs->lrs_size = 2 * rreg_lrs->lrs_used; +} + +static inline void print_state( + const RegAllocControl* con, + const VRegState* vreg_state, UInt n_vregs, + const RRegState* rreg_state, UInt n_rregs, + const RRegLRState* rreg_lr_state, + UShort current_ii) +{ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + const VRegState* vreg = &vreg_state[v_idx]; + + if (vreg->live_after == INVALID_INSTRNO) { + continue; /* This is a dead vreg. Never comes into live. */ + } + vex_printf("vreg_state[%3u] \t", v_idx); + + UInt written; + switch (vreg->disp) { + case Unallocated: + written = vex_printf("unallocated"); + break; + case Assigned: + written = vex_printf("assigned to "); + written += con->ppReg(vreg->rreg); + break; + case Spilled: + written = vex_printf("spilled at offset %u", vreg->spill_offset); + break; + default: + vassert(0); + } + + for (Int w = 30 - written; w > 0; w--) { + vex_printf(" "); + } + + if (vreg->live_after > (Short) current_ii) { + vex_printf("[not live yet]\n"); + } else if ((Short) current_ii >= vreg->dead_before) { + vex_printf("[now dead]\n"); + } else { + vex_printf("[live]\n"); + } + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + vex_printf("rreg_state[%2u] = ", r_idx); + UInt written = con->ppReg(con->univ->regs[r_idx]); + for (Int w = 10 - written; w > 0; w--) { + vex_printf(" "); + } + + switch (rreg->disp) { + case Free: + vex_printf("free\n"); + break; + case Bound: + vex_printf("bound for "); + con->ppReg(rreg->vreg); + vex_printf("\n"); + break; + case Reserved: + vex_printf("reserved - live range [%d, %d)\n", + rreg_lrs->lr_current->live_after, + rreg_lrs->lr_current->dead_before); + break; + } + } +} + +static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out, + const RegAllocControl* con, const HChar* why) +{ + if (DEBUG_REGALLOC) { + vex_printf("** "); + con->ppInstr(instr, con->mode64); + if (why != NULL) { + vex_printf(" (%s)", why); + } + vex_printf("\n\n"); + } + + addHInstr(instrs_out, instr); +} + +/* Spills a vreg assigned to some rreg. + The vreg is spilled and the rreg is freed. + Returns rreg's index. */ +static inline UInt spill_vreg( + HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out, + const RegAllocControl* con) +{ + /* Check some invariants first. */ + vassert(IS_VALID_VREGNO((v_idx))); + vassert(vreg_state[v_idx].disp == Assigned); + HReg rreg = vreg_state[v_idx].rreg; + UInt r_idx = hregIndex(rreg); + vassert(IS_VALID_RREGNO(r_idx)); + vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg)); + vassert(vreg_state[v_idx].dead_before > (Short) current_ii); + vassert(vreg_state[v_idx].reg_class != HRcINVALID); + + /* Generate spill. */ + HInstr* spill1 = NULL; + HInstr* spill2 = NULL; + con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset, + con->mode64); + vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */ + if (spill1 != NULL) { + emit_instr(spill1, instrs_out, con, "spill1"); + } + if (spill2 != NULL) { + emit_instr(spill2, instrs_out, con, "spill2"); + } + + /* Update register allocator state. */ + vreg_state[v_idx].disp = Spilled; + vreg_state[v_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + + return r_idx; +} + +/* Chooses a vreg to be spilled based on various criteria. + The vreg must not be from the instruction being processed, that is, it must + not be listed in reg_usage->vRegs. */ +static inline HReg find_vreg_to_spill( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const HRegUsage* instr_regusage, HRegClass target_hregclass, + const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max, + const RegAllocControl* con) +{ + /* Scan forwards a few instructions to find the most distant mentioned + use of a vreg. We can scan in the range of (inclusive): + - reg_usage[scan_forward_from] + - reg_usage[scan_forward_end], where scan_forward_end + = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */ +# define FEW_INSTRUCTIONS 5 + UInt scan_forward_end + = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ? + scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS; +# undef FEW_INSTRUCTIONS + + HReg vreg_found = INVALID_HREG; + UInt distance_so_far = 0; + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + if (rreg_state[r_idx].disp == Bound) { + HReg vreg = rreg_state[r_idx].vreg; + if (! HRegUsage__contains(instr_regusage, vreg)) { + UInt ii = scan_forward_from; + for ( ; ii <= scan_forward_end; ii++) { + if (HRegUsage__contains(®_usage[ii], vreg)) { + break; + } + } + + if (ii - scan_forward_from > distance_so_far) { + distance_so_far = ii = scan_forward_from; + vreg_found = vreg; + if (ii + distance_so_far == scan_forward_end) { + break; /* We are at the end. Nothing could be better. */ + } + } + } + } + } + + if (hregIsInvalid(vreg_found)) { + vex_printf("doRegisterAllocation_v3: cannot find a register in class: "); + ppHRegClass(target_hregclass); + vex_printf("\n"); + vpanic("doRegisterAllocation_v3: cannot find a register."); + } + + return vreg_found; +} + +/* Find a free rreg of the correct class. + Tries to find an rreg whose live range (if any) is as far ahead in the + incoming instruction stream as possible. An ideal rreg candidate is + a callee-save register because it won't be used for parameter passing + around helper function calls. */ +static Bool find_free_rreg( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const RRegLRState* rreg_lr_state, + UInt current_ii, HRegClass target_hregclass, + Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) +{ + Bool found = False; + UInt distance_so_far = 0; /* running max for |live_after - current_ii| */ + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + if (rreg->disp == Free) { + if (rreg_lrs->lrs_used == 0) { + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + const RRegLR* lr = rreg_lrs->lr_current; + if (lr->live_after > (Short) current_ii) { + /* Not live, yet. */ + if ((lr->live_after - (Short) current_ii) > distance_so_far) { + distance_so_far = lr->live_after - (Short) current_ii; + found = True; + *r_idx_found = r_idx; + } + } else if ((Short) current_ii >= lr->dead_before) { + /* Now dead. Effectively as if there is no LR now. */ + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + /* Going live for this instruction. This could happen only when + rregs are being reserved en mass, for example before + a helper call. */ + vassert(reserve_phase); + } + } + } + } + + return found; +} + +/* A target-independent register allocator (v3). Requires various functions + which it uses to deal abstractly with instructions and registers, since it + cannot have any target-specific knowledge. + + Returns a new list of instructions, which, as a result of the behaviour of + mapRegs, will be in-place modifications of the original instructions. + + Requires that the incoming code has been generated using vreg numbers + 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked + run-time error. + + Takes unallocated instructions and returns allocated instructions. +*/ +HInstrArray* doRegisterAllocation_v3( + /* Incoming virtual-registerised code. */ + HInstrArray* instrs_in, + + /* Register allocator controls to use. */ + const RegAllocControl* con +) +{ + vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0); + + /* The main register allocator state. */ + UInt n_vregs = instrs_in->n_vregs; + VRegState* vreg_state = NULL; + if (n_vregs > 0) { + vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState)); + } + + /* If this is not so, the universe we have is nonsensical. */ + UInt n_rregs = con->univ->allocable; + vassert(n_rregs > 0); + STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64); + + /* Redundant rreg -> vreg state. */ + RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); + + /* Info on rreg live ranges. */ + RRegLRState* rreg_lr_state + = LibVEX_Alloc_inline(n_rregs * sizeof(RRegLRState)); + + /* Info on register usage in the incoming instruction array. Computed once + and remains unchanged, more or less; updated sometimes by the + direct-reload optimisation. */ + HRegUsage* reg_usage + = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); + + /* The live range numbers are signed shorts, and so limiting the + number of instructions to 15000 comfortably guards against them + overflowing 32k. */ + vassert(instrs_in->arr_used <= 15000); + + /* The output array of instructions. */ + HInstrArray* instrs_out = newHInstrArray(); + + +# define OFFENDING_VREG(_v_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for vreg is "#_mode \ + " (should be Write)"); \ + } while (0) + +# define OFFENDING_RREG(_r_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending rreg = "); \ + con->ppReg(con->univ->regs[(_r_idx)]); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for rreg is "#_mode \ + " (should be Write)"); \ + } while (0) + + +/* Finds an rreg of the correct class. + If a free rreg is not found, then spills a vreg not used by the current + instruction and makes free the corresponding rreg. */ +# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ + ({ \ + UInt _r_free_idx = -1; \ + Bool free_rreg_found = find_free_rreg( \ + vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ + (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ + if (!free_rreg_found) { \ + HReg vreg_to_spill = find_vreg_to_spill( \ + vreg_state, n_vregs, rreg_state, n_rregs, \ + ®_usage[(_ii)], (_reg_class), \ + reg_usage, (_ii) + 1, \ + instrs_in->arr_used - 1, con); \ + _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \ + (_ii), v... [truncated message content] |