You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
|
1
(11) |
|
2
(5) |
3
(11) |
4
(13) |
5
(1) |
6
(15) |
7
(1) |
8
(1) |
|
9
(2) |
10
(4) |
11
(15) |
12
(2) |
13
(12) |
14
(2) |
15
(3) |
|
16
(1) |
17
(16) |
18
(1) |
19
(32) |
20
(19) |
21
(3) |
22
|
|
23
|
24
(4) |
25
|
26
(1) |
27
(19) |
28
(4) |
29
(2) |
|
30
(3) |
|
|
|
|
|
|
|
From: Jeremy F. <je...@go...> - 2003-11-27 16:59:39
|
On Thu, 2003-11-27 at 04:02, Nicholas Nethercote wrote: > 2. Are REP prefixes widely used? Because our current implementation of > them is pretty sucky. In particular, fetching the D-flag via a C call > every time around the loop must be hurting us badly (the D-flag can never > change in the middle of a REP-loop -- that requires a CLD/STD, right?) > I could quite easily pull the C call out the front so it's only done once > per REP. I was doing some profiling with oprofile the other day, and the D flag helper was one of the top 5 functions in valgrind.so when running cc1 (sorry, no concrete numbers on hand at the moment). So that would be good. > This would cut code, which is good. But there'll be lots of small > changes, which would be bad for anyone who has fiddled with vg_to_ucode.c > in their workspace. Would committing this annoy anyone? I can hold off > if so. Fine by me. J |
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 16:15:58
|
CVS commit by nethercote:
Fixed up REP-prefix handling majorly. Factored out *lots* of repetitive code,
so much so that the file is now 280 lines shorter. This despite me also adding
support for LOOP{E,NE} (thanks to Abhijit Menon-Sen). Also added support for
CMPS[lw], which was missing. Adding more REP-prefix instructions in the future
will now be much easier.
As part of this, I moved the D-flag fetch outside of the REP loops. This might
make programs that use REP prefixes a lot go faster.
M +139 -415 vg_to_ucode.c 1.112
--- valgrind/coregrind/vg_to_ucode.c #1.111:1.112
@@ -1807,182 +1807,55 @@ Addr dis_Grp5 ( UCodeBlock* cb,
}
-
-/* Template for REPE CMPS<sz>. Assumes this insn is the last one in
- the basic block, and so emits a jump to the next insn. */
-static
-void codegen_REPE_CMPS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+static __inline__
+void dis_JMP_d32( UCodeBlock* cb, Addr d32 )
{
- Int tc, /* ECX */
- td, /* EDI */ ts, /* ESI */
- tdv, /* (EDI) */ tsv /* (ESI) */;
-
- tdv = newTemp(cb);
- tsv = newTemp(cb);
- td = newTemp(cb);
- ts = newTemp(cb);
- tc = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
- uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0);
- uLiteral(cb, eip_next);
- uInstr1(cb, DEC, 4, TempReg, tc);
- uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
-
- uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
- uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts);
-
- uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tdv);
- uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tsv);
-
- uInstr2(cb, SUB, sz, TempReg, tdv, TempReg, tsv);
- setFlagsFromUOpcode(cb, SUB);
-
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tdv);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tdv);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, tdv);
- uInstr0(cb, CALLM_E, 0);
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, td);
- uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, ts);
-
- uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
- uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI);
-
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip);
- uCond(cb, CondZ);
- uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip_next);
+ uLiteral(cb, d32);
uCond(cb, CondAlways);
}
+/*------------------------------------------------------------*/
+/*--- Disassembling string ops (including REP prefixes) ---*/
+/*------------------------------------------------------------*/
-/* Template for REPNE SCAS<sz>. Assumes this insn is the last one in
- the basic block, and so emits a jump to the next insn. */
+/* Code shared by all the string ops */
static
-void codegen_REPNE_SCAS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+void dis_string_op_increment(UCodeBlock* cb, Int sz, Int t_inc)
{
- Int ta /* EAX */, tc /* ECX */, td /* EDI */, tv;
- ta = newTemp(cb);
- tc = newTemp(cb);
- tv = newTemp(cb);
- td = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
- uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0);
- uLiteral(cb, eip_next);
- uInstr1(cb, DEC, 4, TempReg, tc);
- uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
-
- uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta);
- uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
- uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tv);
- /* next uinstr kills ta, but that's ok -- don't need it again */
- uInstr2(cb, SUB, sz, TempReg, tv, TempReg, ta);
- setFlagsFromUOpcode(cb, SUB);
-
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv);
+ uInstr0 (cb, CALLM_S, 0);
+ uInstr2 (cb, MOV, 4, Literal, 0, TempReg, t_inc);
uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tv);
+ uInstr1 (cb, PUSH, 4, TempReg, t_inc);
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+ uInstr1 (cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
- uInstr1(cb, POP, 4, TempReg, tv);
+ uInstr1(cb, POP, 4, TempReg, t_inc);
uInstr0(cb, CALLM_E, 0);
if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+ uInstr2(cb, SHL, 4, Literal, 0, TempReg, t_inc);
uLiteral(cb, sz/2);
}
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td);
- uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip);
- uCond(cb, CondNZ);
- uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip_next);
- uCond(cb, CondAlways);
}
-/* Template for REPE SCAS<sz>. Assumes this insn is the last one in
- the basic block, and so emits a jump to the next insn. */
static
-void codegen_REPE_SCAS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+void dis_string_op( UCodeBlock* cb, void (*dis_OP)( UCodeBlock*, Int, Int ),
+ Int sz, Char* name, UChar sorb )
{
- Int ta /* EAX */, tc /* ECX */, td /* EDI */, tv;
- ta = newTemp(cb);
- tc = newTemp(cb);
- tv = newTemp(cb);
- td = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
- uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0);
- uLiteral(cb, eip_next);
- uInstr1(cb, DEC, 4, TempReg, tc);
- uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
-
- uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta);
- uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
- uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tv);
- /* next uinstr kills ta, but that's ok -- don't need it again */
- uInstr2(cb, SUB, sz, TempReg, tv, TempReg, ta);
- setFlagsFromUOpcode(cb, SUB);
-
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tv);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, tv);
- uInstr0(cb, CALLM_E, 0);
-
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td);
- uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip);
- uCond(cb, CondZ);
- uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip_next);
- uCond(cb, CondAlways);
+ Int t_inc = newTemp(cb);
+ vg_assert(sorb == 0);
+ dis_string_op_increment(cb, sz, t_inc);
+ dis_OP( cb, sz, t_inc );
+ if (dis) VG_(printf)("%s%c\n", name, nameISize(sz));
}
-/* Template for REPE MOVS<sz>. Assumes this insn is the last one in
- the basic block, and so emits a jump to the next insn. */
static
-void codegen_REPE_MOVS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+void dis_MOVS ( UCodeBlock* cb, Int sz, Int t_inc )
{
- Int ts /* ESI */, tc /* ECX */, td /* EDI */, tv;
- tc = newTemp(cb);
- td = newTemp(cb);
- ts = newTemp(cb);
- tv = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
- uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0);
- uLiteral(cb, eip_next);
- uInstr1(cb, DEC, 4, TempReg, tc);
- uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
+ Int tv = newTemp(cb); /* value being copied */
+ Int td = newTemp(cb); /* EDI */
+ Int ts = newTemp(cb); /* ESI */
uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
@@ -1990,48 +1863,32 @@ void codegen_REPE_MOVS ( UCodeBlock* cb,
uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tv);
- uInstr2(cb, STORE, sz, TempReg, tv, TempReg, td);
+ uInstr2(cb, STORE,sz, TempReg, tv, TempReg, td);
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tv);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, td);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, ts);
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+ uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
+ uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI);
+}
- uInstr1(cb, POP, 4, TempReg, tv);
- uInstr0(cb, CALLM_E, 0);
+static
+void dis_LODS ( UCodeBlock* cb, Int sz, Int t_inc )
+{
+ Int ta = newTemp(cb); /* EAX */
+ Int ts = newTemp(cb); /* ESI */
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td);
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, ts);
+ uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts);
+ uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, ta);
+ uInstr2(cb, PUT, sz, TempReg, ta, ArchReg, R_EAX);
- uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, ts);
uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI);
-
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip);
- uCond(cb, CondAlways);
}
-
-/* Template for REPE STOS<sz>. Assumes this insn is the last one in
- the basic block, and so emits a jump to the next insn. */
static
-void codegen_REPE_STOS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+void dis_STOS ( UCodeBlock* cb, Int sz, Int t_inc )
{
- Int ta /* EAX */, tc /* ECX */, td /* EDI */;
- ta = newTemp(cb);
- tc = newTemp(cb);
- td = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
- uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0);
- uLiteral(cb, eip_next);
- uInstr1(cb, DEC, 4, TempReg, tc);
- uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
+ Int ta = newTemp(cb); /* EAX */
+ Int td = newTemp(cb); /* EDI */
uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta);
@@ -2039,38 +1896,15 @@ void codegen_REPE_STOS ( UCodeBlock* cb,
uInstr2(cb, STORE, sz, TempReg, ta, TempReg, td);
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, ta);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, ta);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, ta);
- uInstr0(cb, CALLM_E, 0);
-
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, ta, TempReg, td);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, td);
uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
-
- uInstr1(cb, JMP, 0, Literal, 0);
- uLiteral(cb, eip);
- uCond(cb, CondAlways);
}
-
-/* Template for CMPS<sz>, _not_ preceded by a REP prefix. */
static
-void codegen_CMPS ( UCodeBlock* cb, Int sz )
+void dis_CMPS ( UCodeBlock* cb, Int sz, Int t_inc )
{
- Int td, /* EDI */ ts, /* ESI */
- tdv, /* (EDI) */ tsv /* (ESI) */;
- tdv = newTemp(cb);
- tsv = newTemp(cb);
- td = newTemp(cb);
- ts = newTemp(cb);
+ Int tdv = newTemp(cb); /* (EDI) */
+ Int tsv = newTemp(cb); /* (ESI) */
+ Int td = newTemp(cb); /* EDI */
+ Int ts = newTemp(cb); /* ESI */
uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
@@ -2083,60 +1917,6 @@ void codegen_CMPS ( UCodeBlock* cb, Int
setFlagsFromUOpcode(cb, SUB);
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tdv);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tdv);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, tdv);
- uInstr0(cb, CALLM_E, 0);
-
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, td);
- uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, ts);
-
- uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
- uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI);
-}
-
-
-/* Template for MOVS<sz>, _not_ preceded by a REP prefix. */
-static
-void codegen_MOVS ( UCodeBlock* cb, Int sz )
-{
- Int tv, /* the value being copied */
- td, /* EDI */ ts /* ESI */;
- tv = newTemp(cb);
- td = newTemp(cb);
- ts = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
- uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts);
-
- uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tv);
- uInstr2(cb, STORE, sz, TempReg, tv, TempReg, td);
-
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tv);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, tv);
- uInstr0(cb, CALLM_E, 0);
-
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td);
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, ts);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, td);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, ts);
uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
@@ -2144,104 +1924,60 @@ void codegen_MOVS ( UCodeBlock* cb, Int
}
-
-/* Template for STOS<sz>, _not_ preceded by a REP prefix. */
static
-void codegen_STOS ( UCodeBlock* cb, Int sz )
+void dis_SCAS ( UCodeBlock* cb, Int sz, Int t_inc )
{
- Int ta /* EAX */, td /* EDI */;
- ta = newTemp(cb);
- td = newTemp(cb);
+ Int ta = newTemp(cb); /* EAX */
+ Int td = newTemp(cb); /* EDI */
+ Int tdv = newTemp(cb); /* (EDI) */
uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta);
uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
- uInstr2(cb, STORE, sz, TempReg, ta, TempReg, td);
-
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, ta);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, ta);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, ta);
- uInstr0(cb, CALLM_E, 0);
+ uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tdv);
+ /* next uinstr kills ta, but that's ok -- don't need it again */
+ uInstr2(cb, SUB, sz, TempReg, tdv, TempReg, ta);
+ setFlagsFromUOpcode(cb, SUB);
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, ta, TempReg, td);
+ uInstr2(cb, ADD, 4, TempReg, t_inc, TempReg, td);
uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
}
-/* Template for LODS<sz>, _not_ preceded by a REP prefix. */
-static
-void codegen_LODS ( UCodeBlock* cb, Int sz )
-{
- Int ta /* EAX */, ts /* ESI */;
- ta = newTemp(cb);
- ts = newTemp(cb);
-
- uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts);
- uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, ta);
- uInstr2(cb, PUT, sz, TempReg, ta, ArchReg, R_EAX);
-
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, ta);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, ta);
-
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
-
- uInstr1(cb, POP, 4, TempReg, ta);
- uInstr0(cb, CALLM_E, 0);
-
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta);
- uLiteral(cb, sz/2);
- }
- uInstr2(cb, ADD, 4, TempReg, ta, TempReg, ts);
- uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI);
-}
-
-
-/* Template for SCAS<sz>, _not_ preceded by a REP prefix. */
+/* Wrap the appropriate string op inside a REP/REPE/REPNE.
+ We assume the insn is the last one in the basic block, and so emit a jump
+ to the next insn, rather than just falling through. */
static
-void codegen_SCAS ( UCodeBlock* cb, Int sz )
+void dis_REP_op ( UCodeBlock* cb, Int cond,
+ void (*dis_OP)(UCodeBlock*, Int, Int),
+ Int sz, Addr eip, Addr eip_next, Char* name )
{
- Int ta /* EAX */, td /* EDI */, tv;
- ta = newTemp(cb);
- tv = newTemp(cb);
- td = newTemp(cb);
+ Int t_inc = newTemp(cb);
+ Int tc = newTemp(cb); /* ECX */
- uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta);
- uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
- uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tv);
- /* next uinstr kills ta, but that's ok -- don't need it again */
- uInstr2(cb, SUB, sz, TempReg, tv, TempReg, ta);
- setFlagsFromUOpcode(cb, SUB);
+ dis_string_op_increment(cb, sz, t_inc);
- uInstr0(cb, CALLM_S, 0);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, 0);
- uInstr1(cb, PUSH, 4, TempReg, tv);
+ uInstr2 (cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
+ uInstr2 (cb, JIFZ, 4, TempReg, tc, Literal, 0);
+ uLiteral(cb, eip_next);
+ uInstr1 (cb, DEC, 4, TempReg, tc);
+ uInstr2 (cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
- uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+ dis_OP (cb, sz, t_inc);
- uInstr1(cb, POP, 4, TempReg, tv);
- uInstr0(cb, CALLM_E, 0);
+ if (cond == CondAlways) {
+ dis_JMP_d32 (cb, eip);
+ } else {
+ uInstr1 (cb, JMP, 0, Literal, 0);
+ uLiteral (cb, eip);
+ uCond (cb, cond);
+ uFlagsRWU (cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
- if (sz == 4 || sz == 2) {
- uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
- uLiteral(cb, sz/2);
+ dis_JMP_d32 (cb, eip_next);
}
- uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td);
- uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
+ if (dis) VG_(printf)("%s%c\n", name, nameISize(sz));
}
+/*------------------------------------------------------------*/
+/*--- Arithmetic, etc. ---*/
+/*------------------------------------------------------------*/
/* (I)MUL E, G. Supplied eip points to the modR/M byte. */
@@ -5111,4 +4847,6 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ case 0xE0: /* LOOPNE disp8 */
+ case 0xE1: /* LOOPE disp8 */
case 0xE2: /* LOOP disp8 */
/* Again, the docs say this uses ECX/CX as a count depending on
@@ -5123,7 +4861,19 @@ static Addr disInstr ( UCodeBlock* cb, A
uInstr2(cb, JIFZ, 4, TempReg, t1, Literal, 0);
uLiteral(cb, eip);
+
+ if (opc == 0xE2) { /* LOOP */
uInstr1(cb, JMP, 0, Literal, 0);
uLiteral(cb, d32);
uCond(cb, CondAlways);
+ } else { /* LOOPE/LOOPNE */
+ uInstr1(cb, JMP, 0, Literal, 0);
+ uLiteral(cb, eip);
+ uCond(cb, (opc == 0xE1 ? CondNZ : CondZ));
+ uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+
+ uInstr1(cb, JMP, 0, Literal, 0);
+ uLiteral(cb, d32);
+ uCond(cb, CondAlways);
+ }
*isEnd = True;
if (dis)
@@ -5700,54 +5450,29 @@ static Addr disInstr ( UCodeBlock* cb, A
/* ------------------------ SCAS et al ----------------- */
- case 0xA4: /* MOVSb, no REP prefix */
- vg_assert(sorb == 0);
- codegen_MOVS ( cb, 1 );
- if (dis) VG_(printf)("movsb\n");
- break;
- case 0xA5: /* MOVSv, no REP prefix */
- vg_assert(sorb == 0);
- codegen_MOVS ( cb, sz );
- if (dis) VG_(printf)("movs%c\n", nameISize(sz));
+ case 0xA4: /* MOVS, no REP prefix */
+ case 0xA5:
+ dis_string_op( cb, dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
break;
case 0xA6: /* CMPSb, no REP prefix */
- vg_assert(sorb == 0);
- codegen_CMPS ( cb, 1 );
- if (dis) VG_(printf)("cmpsb\n");
+ case 0xA7:
+ dis_string_op( cb, dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
break;
- case 0xAA: /* STOSb, no REP prefix */
- vg_assert(sorb == 0);
- codegen_STOS ( cb, 1 );
- if (dis) VG_(printf)("stosb\n");
- break;
- case 0xAB: /* STOSv, no REP prefix */
- vg_assert(sorb == 0);
- codegen_STOS ( cb, sz );
- if (dis) VG_(printf)("stos%c\n", nameISize(sz));
+ case 0xAA: /* STOS, no REP prefix */
+ case 0xAB:
+ dis_string_op( cb, dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
break;
- case 0xAC: /* LODSb, no REP prefix */
- vg_assert(sorb == 0);
- codegen_LODS ( cb, 1 );
- if (dis) VG_(printf)("lodsb\n");
- break;
- case 0xAD: /* LODSv, no REP prefix */
- vg_assert(sorb == 0);
- codegen_LODS ( cb, sz );
- if (dis) VG_(printf)("lods%c\n", nameISize(sz));
+ case 0xAC: /* LODS, no REP prefix */
+ case 0xAD:
+ dis_string_op( cb, dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
break;
- case 0xAE: /* SCASb, no REP prefix */
- vg_assert(sorb == 0);
- codegen_SCAS ( cb, 1 );
- if (dis) VG_(printf)("scasb\n");
+ case 0xAE: /* SCAS, no REP prefix */
+ case 0xAF:
+ dis_string_op( cb, dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
break;
- case 0xAF: /* SCASl, no REP prefix */
- vg_assert(sorb == 0);
- codegen_SCAS ( cb, sz );
- if (dis) VG_(printf)("scas;\n");
- break;
case 0xFC: /* CLD */
@@ -5791,7 +5516,6 @@ static Addr disInstr ( UCodeBlock* cb, A
if (abyte == 0xAE || abyte == 0xAF) { /* REPNE SCAS<sz> */
if (abyte == 0xAE) sz = 1;
- codegen_REPNE_SCAS ( cb, sz, eip_orig, eip );
+ dis_REP_op ( cb, CondNZ, dis_SCAS, sz, eip_orig, eip, "repne scas" );
*isEnd = True;
- if (dis) VG_(printf)("repne scas%c\n", nameISize(sz));
}
else {
@@ -5801,5 +5525,7 @@ static Addr disInstr ( UCodeBlock* cb, A
}
- case 0xF3: { /* REPE prefix insn */
+ /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
+ for the rest, it means REP) */
+ case 0xF3: {
Addr eip_orig = eip - 1;
vg_assert(sorb == 0);
@@ -5807,36 +5534,33 @@ static Addr disInstr ( UCodeBlock* cb, A
if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
- if (abyte == 0xA4 || abyte == 0xA5) { /* REPE MOV<sz> */
+ if (abyte == 0xA4 || abyte == 0xA5) { /* REP MOV<sz> */
if (abyte == 0xA4) sz = 1;
- codegen_REPE_MOVS ( cb, sz, eip_orig, eip );
+ dis_REP_op ( cb, CondAlways, dis_MOVS, sz, eip_orig, eip, "rep movs" );
*isEnd = True;
- if (dis) VG_(printf)("repe mov%c\n", nameISize(sz));
}
else
if (abyte == 0xA6 || abyte == 0xA7) { /* REPE CMP<sz> */
if (abyte == 0xA6) sz = 1;
- codegen_REPE_CMPS ( cb, sz, eip_orig, eip );
+ dis_REP_op ( cb, CondZ, dis_CMPS, sz, eip_orig, eip, "repe cmps" );
*isEnd = True;
- if (dis) VG_(printf)("repe cmps%c\n", nameISize(sz));
}
else
- if (abyte == 0xAA || abyte == 0xAB) { /* REPE STOS<sz> */
+ if (abyte == 0xAA || abyte == 0xAB) { /* REP STOS<sz> */
if (abyte == 0xAA) sz = 1;
- codegen_REPE_STOS ( cb, sz, eip_orig, eip );
+ dis_REP_op ( cb, CondAlways, dis_STOS, sz, eip_orig, eip, "rep stos" );
*isEnd = True;
- if (dis) VG_(printf)("repe stos%c\n", nameISize(sz));
}
else
if (abyte == 0xAE || abyte == 0xAF) { /* REPE SCAS<sz> */
if (abyte == 0xAE) sz = 1;
- codegen_REPE_SCAS ( cb, sz, eip_orig, eip );
+ dis_REP_op ( cb, CondZ, dis_SCAS, sz, eip_orig, eip, "repe scas" );
*isEnd = True;
- if (dis) VG_(printf)("repe scas%c\n", nameISize(sz));
}
else
- if (abyte == 0x90) { /* REPE NOP (PAUSE) */
- if (dis) VG_(printf)("repe nop (P4 pause)\n");
+ if (abyte == 0x90) { /* REP NOP (PAUSE) */
+ if (dis) VG_(printf)("rep nop (P4 pause)\n");
/* do nothing; apparently a hint to the P4 re spin-wait loop */
- } else {
+ }
+ else {
goto decode_failure;
}
|
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 13:33:53
|
On Thu, 27 Nov 2003, Dirk Mueller wrote: > > Ok, so far, one vote in favour of speeding these up, one against. I'll > > probably do it, because it will be pretty easy. > > I was not voting against speeding it up. I was voting against spending much > time on which particular REPxx instruction works with LODS etc. Oh, sorry. There's no problem with the prefixes and LODS, they're correct as is. As for frequency of REP prefixes, there seem to be plenty in different programs on my system -- even in valgrind.so (compiled by GCC 3.2.2), so I will definitely do it. N |
|
From: Dirk M. <dm...@gm...> - 2003-11-27 12:55:51
|
On Thursday 27 November 2003 13:52, Nicholas Nethercote wrote: > Ok, so far, one vote in favour of speeding these up, one against. I'll > probably do it, because it will be pretty easy. I was not voting against speeding it up. I was voting against spending much time on which particular REPxx instruction works with LODS etc. |
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 12:52:33
|
On Thu, 27 Nov 2003, Dirk Mueller wrote:
> > 1. What is the point of "rep lods"? AFAICT, it loads multiple words from
> > the address pointed to by %esi into %eax.
>
> I think you can quickly scan for a 0 byte/word/dword this way.
No, that's what REP{E,NE} SCAS is for. LODS can only be prefixed with
REP, so the LODS must be done N times, where N is in %ecx.
> Afaik no compiler generates those sequences anymore, so you'll only hit in
> very old applications that contain 5 year old assembler sequences. As long as
> nobody complains, don't bother.
Ok, so far, one vote in favour of speeding these up, one against. I'll
probably do it, because it will be pretty easy.
N
|
|
From: Dirk M. <dm...@gm...> - 2003-11-27 12:26:41
|
On Thursday 27 November 2003 13:02, Nicholas Nethercote wrote: > 1. What is the point of "rep lods"? AFAICT, it loads multiple words from > the address pointed to by %esi into %eax. I think you can quickly scan for a 0 byte/word/dword this way. the REP instructions became quite unpopular lately as doing it the "normal" ways is meanwhile faster with recent CPUs rather than using the REPxx stuff. Afaik no compiler generates those sequences anymore, so you'll only hit in very old applications that contain 5 year old assembler sequences. As long as nobody complains, don't bother. |
|
From: Tom H. <th...@cy...> - 2003-11-27 12:22:07
|
In message <Pin...@gr...>
Nicholas Nethercote <nj...@ca...> wrote:
> 2. Are REP prefixes widely used? Because our current implementation of
> them is pretty sucky. In particular, fetching the D-flag via a C call
> every time around the loop must be hurting us badly (the D-flag can never
> change in the middle of a REP-loop -- that requires a CLD/STD, right?)
> I could quite easily pull the C call out the front so it's only done once
> per REP.
Well REPZ and REPNZ are used quite a lot for inlining various string
operations. A quick scan of one of our programs shows 1424 REPZ/REPNZ
prefixes in a 10Mb executable.
Tom
--
Tom Hughes (th...@cy...)
Software Engineer, Cyberscience Corporation
http://www.cyberscience.com/
|
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 12:17:48
|
On Thu, 27 Nov 2003, Nicholas Nethercote wrote: > I also clarified the difference between REP and REPE a bit > (briefly, the same code is used for REP and REPE, but REPE only applies to > scas & cmps, whereas REP applies to lods, stos, ins, outs and movs). This is unclear: by "the same code" I mean the same byte (0xF3) is used in the machine code, ie. 0xF3 means REP or REPE, depending on the following instruction. N |
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 12:02:41
|
Hi, I've given REP prefix handling in vg_to_ucode.c a thorough overhaul. There was a whole lot of code duplication in there, eg. the codegen_REPE_SCAS() contained all the code in codegen_SCAS(), (ditto for all the other ones), and various other stupidities. All up I cut about 220 lines of code, and generalised it so that adding missing REP prefixed instructions in the future will be a whole lot easier. I also clarified the difference between REP and REPE a bit (briefly, the same code is used for REP and REPE, but REPE only applies to scas & cmps, whereas REP applies to lods, stos, ins, outs and movs). I have a couple of questions about this before I commit it in the HEAD, though. 1. What is the point of "rep lods"? AFAICT, it loads multiple words from the address pointed to by %esi into %eax. I assume Intel just let you REP-prefix it for consistency with the other string ops? Valgrind doesn't handle it currently, I don't imagine anyone ever used it. 2. Are REP prefixes widely used? Because our current implementation of them is pretty sucky. In particular, fetching the D-flag via a C call every time around the loop must be hurting us badly (the D-flag can never change in the middle of a REP-loop -- that requires a CLD/STD, right?) I could quite easily pull the C call out the front so it's only done once per REP. 3. I've seen some other opportunities for code factoring in vg_to_ucode.c. In particular there are loads of jumps like this: uInstr1(cb, JMP, 0, Literal, 0); uLiteral(cb, d32); uCond(cb, CondAlways); for which a function could be factored out. This would cut code, which is good. But there'll be lots of small changes, which would be bad for anyone who has fiddled with vg_to_ucode.c in their workspace. Would committing this annoy anyone? I can hold off if so. N |
|
From: Dirk M. <mu...@kd...> - 2003-11-27 11:45:19
|
CVS commit by mueller: CVS_SILENT ignore M +2 -0 .cvsignore 1.3 --- valgrind/helgrind/tests/.cvsignore #1.2:1.3 @@ -11,2 +11,4 @@ race2 readshared +inherit.stderr.diff +inherit.stderr.out |
|
From: Dirk M. <mu...@kd...> - 2003-11-27 11:44:22
|
CVS commit by mueller:
remove reference to --assume-2.4 option
M +0 -5 coregrind_core.html 1.19
--- valgrind/coregrind/docs/coregrind_core.html #1.18:1.19
@@ -866,9 +866,4 @@
</li><br><p>
- <li><code>--assume-2.4=no</code> [default]<br>
- <code>--assume-2.4=yes</code><br>
- <p>Assume we're running on a 2.4 kernel. Only needed for some
- distributions that are broken in obscure ways.
- </li><br><p>
</ul>
|
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 09:36:22
|
On Thu, 27 Nov 2003, Dirk Mueller wrote: > CVS commit by mueller: > > reverting last commit, which broke all of valgrind. Once you guys have worked this out (the current HEAD looks ok on my RH9/2.4.19 box, BTW), can one of you please remove references to the --assume-2.4 option from the docs? Thanks. N |
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 09:30:58
|
CVS commit by nethercote: Fix comment M +1 -1 vg_to_ucode.c 1.111 --- valgrind/coregrind/vg_to_ucode.c #1.110:1.111 @@ -2209,5 +2209,5 @@ void codegen_LODS ( UCodeBlock* cb, Int -/* Template for REPNE SCAS<sz>, _not_ preceded by a REP prefix. */ +/* Template for SCAS<sz>, _not_ preceded by a REP prefix. */ static void codegen_SCAS ( UCodeBlock* cb, Int sz ) |
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 09:29:09
|
CVS commit by nethercote:
Added some missing instructions, from HEAD:
SCAS{l,w}
REPE SCAS
CVTPD2PS
MOV imm,eSP
M +75 -3 vg_to_ucode.c 1.87.2.11
--- valgrind/coregrind/vg_to_ucode.c #1.87.2.10:1.87.2.11
@@ -1916,4 +1916,54 @@ void codegen_REPNE_SCAS ( UCodeBlock* cb
}
+/* Template for REPE SCAS<sz>. Assumes this insn is the last one in
+ the basic block, and so emits a jump to the next insn. */
+static
+void codegen_REPE_SCAS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next )
+{
+ Int ta /* EAX */, tc /* ECX */, td /* EDI */, tv;
+ ta = newTemp(cb);
+ tc = newTemp(cb);
+ tv = newTemp(cb);
+ td = newTemp(cb);
+
+ uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc);
+ uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0);
+ uLiteral(cb, eip_next);
+ uInstr1(cb, DEC, 4, TempReg, tc);
+ uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX);
+
+ uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta);
+ uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td);
+ uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tv);
+ /* next uinstr kills ta, but that's ok -- don't need it again */
+ uInstr2(cb, SUB, sz, TempReg, tv, TempReg, ta);
+ setFlagsFromUOpcode(cb, SUB);
+
+ uInstr0(cb, CALLM_S, 0);
+ uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv);
+ uLiteral(cb, 0);
+ uInstr1(cb, PUSH, 4, TempReg, tv);
+
+ uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag));
+ uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty);
+
+ uInstr1(cb, POP, 4, TempReg, tv);
+ uInstr0(cb, CALLM_E, 0);
+
+ if (sz == 4 || sz == 2) {
+ uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv);
+ uLiteral(cb, sz/2);
+ }
+ uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td);
+ uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI);
+ uInstr1(cb, JMP, 0, Literal, 0);
+ uLiteral(cb, eip);
+ uCond(cb, CondZ);
+ uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
+ uInstr1(cb, JMP, 0, Literal, 0);
+ uLiteral(cb, eip_next);
+ uCond(cb, CondAlways);
+}
+
/* Template for REPE MOVS<sz>. Assumes this insn is the last one in
@@ -2157,5 +2207,5 @@ void codegen_LODS ( UCodeBlock* cb, Int
-/* Template for REPNE SCAS<sz>, _not_ preceded by a REP prefix. */
+/* Template for SCAS<sz>, _not_ preceded by a REP prefix. */
static
void codegen_SCAS ( UCodeBlock* cb, Int sz )
@@ -4743,4 +4793,12 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* CVTPD2PS -- convert two doubles to two floats. */
+ if (sz == 2 &&
+ insn[0] == 0x0F && insn[1] == 0x5A) {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "cvtpd2ps",
+ 0x66, insn[0], insn[1] );
+ goto decode_success;
+ }
+
/* SQRTPD: square root of packed double. */
if (sz == 2
@@ -5202,4 +5260,5 @@ static Addr disInstr ( UCodeBlock* cb, A
case 0xBA: /* MOV imm,eDX */
case 0xBB: /* MOV imm,eBX */
+ case 0xBC: /* MOV imm,eSP */
case 0xBD: /* MOV imm,eBP */
case 0xBE: /* MOV imm,eSI */
@@ -5711,4 +5770,10 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ case 0xAF: /* SCASl, no REP prefix */
+ vg_assert(sorb == 0);
+ codegen_SCAS ( cb, sz );
+ if (dis) VG_(printf)("scas;\n");
+ break;
+
case 0xFC: /* CLD */
uInstr0(cb, CALLM_S, 0);
@@ -5786,4 +5851,11 @@ static Addr disInstr ( UCodeBlock* cb, A
*isEnd = True;
if (dis) VG_(printf)("repe stos%c\n", nameISize(sz));
+ }
+ else
+ if (abyte == 0xAE || abyte == 0xAF) { /* REPE SCAS<sz> */
+ if (abyte == 0xAE) sz = 1;
+ codegen_REPE_SCAS ( cb, sz, eip_orig, eip );
+ *isEnd = True;
+ if (dis) VG_(printf)("repe scas%c\n", nameISize(sz));
}
else
|
|
From: Nicholas N. <nj...@ca...> - 2003-11-27 09:19:49
|
On Thu, 27 Nov 2003, Dirk Mueller wrote: > > Is a solaris version of valgrind ever likely ? > > Its on the todo,but if nobody is helping with it, its likely not going to > happen. Note that this is on the very-speculative-and-would-be-a-lot-of-work part of the todo list :) A Solaris port has been requested almost as frequently as a Windows port. But a port would be difficult to a non-open source OS, because Valgrind does lots of low-level OS stuff. A BSD port (FreeBSD is easily the most asked-for) would be a much easier first OS port. Also, I suspect we wouldn't get full value out of a Solaris port without also doing a SPARC architecture port. N |
|
From: Dirk M. <dm...@gm...> - 2003-11-27 09:09:47
|
On Wednesday 26 November 2003 12:01, dave tilley wrote: > Is a solaris version of valgrind ever likely ? Its on the todo,but if nobody is helping with it, its likely not going to happen. |
|
From: Dirk M. <mu...@kd...> - 2003-11-27 09:08:21
|
CVS commit by mueller:
reverting last commit, which broke all of valgrind.
M +4 -3 vg_proxylwp.c 1.8
--- valgrind/coregrind/vg_proxylwp.c #1.7:1.8
@@ -895,6 +895,4 @@ static Int proxy_clone(ProxyLWP *proxy)
Int ret = -1;
- proxy->lwp = -1;
-
if (have_settid != 0) {
ret = VG_(clone)(proxylwp,
@@ -921,9 +919,11 @@ static Int proxy_clone(ProxyLWP *proxy)
}
}
+ else
+ have_settid = 1;
}
if (ret < 0) {
vg_assert(have_settid == 0);
- vg_assert(proxy->lwp == -1);
+ vg_assert(proxy->lwp == 0);
ret = VG_(clone)(proxylwp,
|
|
From: Jeremy F. <je...@go...> - 2003-11-27 08:11:49
|
CVS commit by fitzhardinge:
Fix up the have_settid test so it works on both plain 2.4 and 2.6 kernels.
I think this will also work on SuSE kernels.
M +26 -12 vg_proxylwp.c 1.7
--- valgrind/coregrind/vg_proxylwp.c #1.6:1.7
@@ -893,5 +893,7 @@ static Int have_settid = -1; /* -1 -> un
static Int proxy_clone(ProxyLWP *proxy)
{
- Int ret;
+ Int ret = -1;
+
+ proxy->lwp = -1;
if (have_settid != 0) {
@@ -899,18 +901,30 @@ static Int proxy_clone(ProxyLWP *proxy)
LWP_stack(proxy),
VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM |
- VKI_CLONE_SIGHAND | VKI_CLONE_THREAD /*|
- VKI_CLONE_PARENT_SETTID
- VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED*/,
+ VKI_CLONE_SIGHAND | VKI_CLONE_THREAD |
+ VKI_CLONE_PARENT_SETTID |
+ VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED,
proxy, &proxy->lwp, &proxy->lwp);
- if ( have_settid < 0 && !proxy->lwp ) {
+
+ if ( have_settid == -1 && (ret < 0 || proxy->lwp == 0) ) {
have_settid = 0;
+
+ /* Assume that not having parent_settid also means that we've
+ got 2.4-style signal handling, which means we need to do
+ more work. */
+ VG_(do_signal_routing) = True;
+
+ if (ret > 0) {
+ /* If clone actually succeeded and just ignored the
+ CLONE_PARENT_SETTID flag, then use the LWP it created
+ for us. */
proxy->lwp = ret;
- VG_(do_signal_routing) = True; /* XXX True, it seems kernels
- which have futex also have
- sensible signal handling, but
- it would be nice to test it
- directly. */
}
- } else {
+ }
+ }
+
+ if (ret < 0) {
+ vg_assert(have_settid == 0);
+ vg_assert(proxy->lwp == -1);
+
ret = VG_(clone)(proxylwp,
LWP_stack(proxy),
|
|
From: Dirk M. <mu...@kd...> - 2003-11-27 02:17:16
|
CVS commit by mueller:
test for PARENT_SETTID support in clone() of the kernel instead
of testing for presence of NPTL by assuming that sys_futex is only
implemented when its a NPTL patched kernel.
M +0 -2 vg_include.h 1.157
M +0 -9 vg_main.c 1.126
M +20 -21 vg_proxylwp.c 1.6
--- valgrind/coregrind/vg_include.h #1.156:1.157
@@ -244,6 +244,4 @@ extern Char* VG_(clo_weird_hacks);
signals. */
extern Int VG_(clo_signal_polltime);
-/* Assume we're running on a plain 2.4 kernel */
-extern Bool VG_(clo_assume_24);
/* Low latency syscalls and signals */
--- valgrind/coregrind/vg_main.c #1.125:1.126
@@ -570,7 +570,4 @@ static Bool VG_(clo_wait_for_gdb) =
Int VG_(clo_signal_polltime) = 50;
-/* If true, assume we're running on a plain 2.4 kernel */
-Bool VG_(clo_assume_24) = False;
-
/* These flags reduce thread wakeup latency on syscall completion and
signal delivery, respectively. The downside is possible unfairness. */
@@ -678,5 +675,4 @@ static void usage ( void )
" --lowlat-syscalls=no|yes improve wake-up latency when a thread's\n"
" syscall completes [no]\n"
-" --assume-2.4=no|yes assume we're running on a 2.4 kernel [no]\n"
"\n"
" %s tool user options:\n";
@@ -1126,9 +1122,4 @@ static void process_cmd_line_options ( v
else if (VG_CLO_STREQ(argv[i], "--lowlat-syscalls=no"))
VG_(clo_lowlat_syscalls) = False;
-
- else if (VG_CLO_STREQ(argv[i], "--assume-2.4=yes"))
- VG_(clo_assume_24) = True;
- else if (VG_CLO_STREQ(argv[i], "--assume-2.4=no"))
- VG_(clo_assume_24) = False;
else if (VG_CLO_STREQN(13, argv[i], "--stop-after="))
--- valgrind/coregrind/vg_proxylwp.c #1.5:1.6
@@ -883,5 +883,5 @@ static Int do_futex(void *addr, Int op,
#define VKI_FUTEX_REQUEUE 3
-static Int have_futex = -1; /* -1 -> unknown */
+static Int have_settid = -1; /* -1 -> unknown */
/*
@@ -895,19 +895,15 @@ static Int proxy_clone(ProxyLWP *proxy)
Int ret;
- if (VG_(clo_assume_24))
- have_futex = 0;
-
- if (have_futex == -1)
- have_futex = do_futex(NULL, VKI_FUTEX_WAKE, 0, NULL, NULL) != -VKI_ENOSYS;
-
- if (have_futex) {
+ if (have_settid != 0) {
ret = VG_(clone)(proxylwp,
LWP_stack(proxy),
VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM |
- VKI_CLONE_SIGHAND | VKI_CLONE_THREAD |
- VKI_CLONE_PARENT_SETTID |
- VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED,
+ VKI_CLONE_SIGHAND | VKI_CLONE_THREAD /*|
+ VKI_CLONE_PARENT_SETTID
+ VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED*/,
proxy, &proxy->lwp, &proxy->lwp);
- } else {
+ if ( have_settid < 0 && !proxy->lwp ) {
+ have_settid = 0;
+ proxy->lwp = ret;
VG_(do_signal_routing) = True; /* XXX True, it seems kernels
which have futex also have
@@ -915,5 +911,6 @@ static Int proxy_clone(ProxyLWP *proxy)
it would be nice to test it
directly. */
-
+ }
+ } else {
ret = VG_(clone)(proxylwp,
LWP_stack(proxy),
@@ -932,12 +929,12 @@ static Bool proxy_wait(ProxyLWP *proxy,
Bool ret = False;
- if (have_futex == -1)
+ if (have_settid == -1)
return False;
- if (have_futex) {
+ if (have_settid) {
if (block) {
Int lwp = proxy->lwp;
- while(proxy->lwp != 0)
+ if(proxy->lwp != 0)
do_futex(&proxy->lwp, VKI_FUTEX_WAIT, lwp, NULL, NULL);
@@ -985,4 +982,6 @@ void VG_(proxy_create)(ThreadId tid)
proxy->tid = tid;
proxy->tst = tst;
+ proxy->exitcode = 0;
+ proxy->lwp = 0;
proxy->siginfo.si_signo = 0;
proxy->frommain = VG_(safe_fd)(p[0]);
@@ -1313,5 +1312,5 @@ void VG_(proxy_sanity)(void)
ThreadState *tst = &VG_(threads)[tid];
ProxyLWP *px;
- Int status;
+ Int status = 0;
Int ret;
|