You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
1
|
2
(1) |
|
3
(3) |
4
(7) |
5
|
6
(1) |
7
(3) |
8
(4) |
9
(2) |
|
10
|
11
|
12
|
13
|
14
|
15
|
16
(1) |
|
17
(4) |
18
(2) |
19
(1) |
20
(1) |
21
(2) |
22
(4) |
23
(2) |
|
24
(1) |
25
|
26
(5) |
27
(2) |
28
(3) |
29
(1) |
30
|
|
From: <sv...@va...> - 2016-04-06 09:52:26
|
Author: sewardj
Date: Wed Apr 6 10:52:17 2016
New Revision: 15850
Log:
Improve performance of helperc_MAKE_STACK_UNINIT, especially for the
amd64-{linux,darwin} cases. n-i-bz.
Modified:
trunk/memcheck/mc_include.h
trunk/memcheck/mc_main.c
trunk/memcheck/mc_translate.c
Modified: trunk/memcheck/mc_include.h
==============================================================================
--- trunk/memcheck/mc_include.h (original)
+++ trunk/memcheck/mc_include.h Wed Apr 6 10:52:17 2016
@@ -316,6 +316,12 @@
MCPE_DIE_MEM_STACK_128,
MCPE_DIE_MEM_STACK_144,
MCPE_DIE_MEM_STACK_160,
+ MCPE_MAKE_STACK_UNINIT_W_O,
+ MCPE_MAKE_STACK_UNINIT_NO_O,
+ MCPE_MAKE_STACK_UNINIT_128_NO_O,
+ MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16,
+ MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8,
+ MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE,
/* Do not add enumerators past this line. */
MCPE_LAST
};
@@ -749,8 +755,14 @@
VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr );
VG_REGPARM(1) UWord MC_(helperc_LOADV8) ( Addr );
-void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
- Addr nia );
+VG_REGPARM(3)
+void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia );
+
+VG_REGPARM(2)
+void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len );
+
+VG_REGPARM(1)
+void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base );
/* Origin tag load/store helpers */
VG_REGPARM(2) void MC_(helperc_b_store1) ( Addr a, UWord d32 );
Modified: trunk/memcheck/mc_main.c
==============================================================================
--- trunk/memcheck/mc_main.c (original)
+++ trunk/memcheck/mc_main.c Wed Apr 6 10:52:17 2016
@@ -255,6 +255,9 @@
#define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
#define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
+// These represent 128 bits of memory.
+#define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
+
#define SM_CHUNKS 16384 // Each SM covers 64k of memory.
#define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
@@ -596,6 +599,12 @@
// In all these, 'low' means it's definitely in the main primary map,
// 'high' means it's definitely in the auxiliary table.
+static INLINE UWord get_primary_map_low_offset ( Addr a )
+{
+ UWord pm_off = a >> 16;
+ return pm_off;
+}
+
static INLINE SecMap** get_secmap_low_ptr ( Addr a )
{
UWord pm_off = a >> 16;
@@ -3515,31 +3524,22 @@
}
-/* Note that this serves both the origin-tracking and
- no-origin-tracking modes. We assume that calls to it are
- sufficiently infrequent that it isn't worth specialising for the
- with/without origin-tracking cases. */
-void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
+/* This marks the stack as addressible but undefined, after a call or
+ return for a target that has an ABI defined stack redzone. It
+ happens quite a lot and needs to be fast. This is the version for
+ origin tracking. The non-origin-tracking version is below. */
+VG_REGPARM(3)
+void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
{
- UInt otag;
- tl_assert(sizeof(UWord) == sizeof(SizeT));
+ PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
if (0)
- VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
+ VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
base, len, nia );
- if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
- UInt ecu = convert_nia_to_ecu ( nia );
- tl_assert(VG_(is_plausible_ECU)(ecu));
- otag = ecu | MC_OKIND_STACK;
- } else {
- tl_assert(nia == 0);
- otag = 0;
- }
+ UInt ecu = convert_nia_to_ecu ( nia );
+ tl_assert(VG_(is_plausible_ECU)(ecu));
-# if 0
- /* Really slow version */
- MC_(make_mem_undefined_w_otag)(base, len, otag);
-# endif
+ UInt otag = ecu | MC_OKIND_STACK;
# if 0
/* Slow(ish) version, which is fairly easily seen to be correct.
@@ -3577,21 +3577,20 @@
directly into the vabits array. (If the sm was distinguished, this
will make a copy and then write to it.)
*/
-
if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
/* Now we know the address range is suitably sized and aligned. */
UWord a_lo = (UWord)(base);
UWord a_hi = (UWord)(base + 128 - 1);
tl_assert(a_lo < a_hi); // paranoia: detect overflow
- if (a_hi <= MAX_PRIMARY_ADDRESS) {
- // Now we know the entire range is within the main primary map.
- SecMap* sm = get_secmap_for_writing_low(a_lo);
- SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
- /* Now we know that the entire address range falls within a
- single secondary map, and that that secondary 'lives' in
- the main primary map. */
- if (LIKELY(sm == sm_hi)) {
- // Finally, we know that the range is entirely within one secmap.
+ if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
+ /* Now we know the entire range is within the main primary map. */
+ UWord pm_off_lo = get_primary_map_low_offset(a_lo);
+ UWord pm_off_hi = get_primary_map_low_offset(a_hi);
+ if (LIKELY(pm_off_lo == pm_off_hi)) {
+ /* Now we know that the entire address range falls within a
+ single secondary map, and that that secondary 'lives' in
+ the main primary map. */
+ SecMap* sm = get_secmap_for_writing_low(a_lo);
UWord v_off = SM_OFF(a_lo);
UShort* p = (UShort*)(&sm->vabits8[v_off]);
p[ 0] = VA_BITS16_UNDEFINED;
@@ -3610,24 +3609,22 @@
p[13] = VA_BITS16_UNDEFINED;
p[14] = VA_BITS16_UNDEFINED;
p[15] = VA_BITS16_UNDEFINED;
- if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
- set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
- }
+ set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
return;
}
}
@@ -3640,14 +3637,13 @@
UWord a_hi = (UWord)(base + 288 - 1);
tl_assert(a_lo < a_hi); // paranoia: detect overflow
if (a_hi <= MAX_PRIMARY_ADDRESS) {
- // Now we know the entire range is within the main primary map.
- SecMap* sm = get_secmap_for_writing_low(a_lo);
- SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
- /* Now we know that the entire address range falls within a
- single secondary map, and that that secondary 'lives' in
- the main primary map. */
- if (LIKELY(sm == sm_hi)) {
- // Finally, we know that the range is entirely within one secmap.
+ UWord pm_off_lo = get_primary_map_low_offset(a_lo);
+ UWord pm_off_hi = get_primary_map_low_offset(a_hi);
+ if (LIKELY(pm_off_lo == pm_off_hi)) {
+ /* Now we know that the entire address range falls within a
+ single secondary map, and that that secondary 'lives' in
+ the main primary map. */
+ SecMap* sm = get_secmap_for_writing_low(a_lo);
UWord v_off = SM_OFF(a_lo);
UShort* p = (UShort*)(&sm->vabits8[v_off]);
p[ 0] = VA_BITS16_UNDEFINED;
@@ -3686,44 +3682,42 @@
p[33] = VA_BITS16_UNDEFINED;
p[34] = VA_BITS16_UNDEFINED;
p[35] = VA_BITS16_UNDEFINED;
- if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
- set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
- set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
- }
+ set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
+ set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
return;
}
}
@@ -3734,6 +3728,278 @@
}
+/* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
+ specialised for the non-origin-tracking case. */
+VG_REGPARM(2)
+void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
+{
+ PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
+ if (0)
+ VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
+ base, len );
+
+# if 0
+ /* Slow(ish) version, which is fairly easily seen to be correct.
+ */
+ if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
+ make_aligned_word64_undefined(base + 0);
+ make_aligned_word64_undefined(base + 8);
+ make_aligned_word64_undefined(base + 16);
+ make_aligned_word64_undefined(base + 24);
+
+ make_aligned_word64_undefined(base + 32);
+ make_aligned_word64_undefined(base + 40);
+ make_aligned_word64_undefined(base + 48);
+ make_aligned_word64_undefined(base + 56);
+
+ make_aligned_word64_undefined(base + 64);
+ make_aligned_word64_undefined(base + 72);
+ make_aligned_word64_undefined(base + 80);
+ make_aligned_word64_undefined(base + 88);
+
+ make_aligned_word64_undefined(base + 96);
+ make_aligned_word64_undefined(base + 104);
+ make_aligned_word64_undefined(base + 112);
+ make_aligned_word64_undefined(base + 120);
+ } else {
+ make_mem_undefined(base, len);
+ }
+# endif
+
+ /* Idea is: go fast when
+ * 8-aligned and length is 128
+ * the sm is available in the main primary map
+ * the address range falls entirely with a single secondary map
+ If all those conditions hold, just update the V+A bits by writing
+ directly into the vabits array. (If the sm was distinguished, this
+ will make a copy and then write to it.)
+ */
+ if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
+ /* Now we know the address range is suitably sized and aligned. */
+ UWord a_lo = (UWord)(base);
+ UWord a_hi = (UWord)(base + 128 - 1);
+ tl_assert(a_lo < a_hi); // paranoia: detect overflow
+ if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
+ /* Now we know the entire range is within the main primary map. */
+ UWord pm_off_lo = get_primary_map_low_offset(a_lo);
+ UWord pm_off_hi = get_primary_map_low_offset(a_hi);
+ if (LIKELY(pm_off_lo == pm_off_hi)) {
+ /* Now we know that the entire address range falls within a
+ single secondary map, and that that secondary 'lives' in
+ the main primary map. */
+ SecMap* sm = get_secmap_for_writing_low(a_lo);
+ UWord v_off = SM_OFF(a_lo);
+ UShort* p = (UShort*)(&sm->vabits8[v_off]);
+ p[ 0] = VA_BITS16_UNDEFINED;
+ p[ 1] = VA_BITS16_UNDEFINED;
+ p[ 2] = VA_BITS16_UNDEFINED;
+ p[ 3] = VA_BITS16_UNDEFINED;
+ p[ 4] = VA_BITS16_UNDEFINED;
+ p[ 5] = VA_BITS16_UNDEFINED;
+ p[ 6] = VA_BITS16_UNDEFINED;
+ p[ 7] = VA_BITS16_UNDEFINED;
+ p[ 8] = VA_BITS16_UNDEFINED;
+ p[ 9] = VA_BITS16_UNDEFINED;
+ p[10] = VA_BITS16_UNDEFINED;
+ p[11] = VA_BITS16_UNDEFINED;
+ p[12] = VA_BITS16_UNDEFINED;
+ p[13] = VA_BITS16_UNDEFINED;
+ p[14] = VA_BITS16_UNDEFINED;
+ p[15] = VA_BITS16_UNDEFINED;
+ return;
+ }
+ }
+ }
+
+ /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
+ if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
+ /* Now we know the address range is suitably sized and aligned. */
+ UWord a_lo = (UWord)(base);
+ UWord a_hi = (UWord)(base + 288 - 1);
+ tl_assert(a_lo < a_hi); // paranoia: detect overflow
+ if (a_hi <= MAX_PRIMARY_ADDRESS) {
+ UWord pm_off_lo = get_primary_map_low_offset(a_lo);
+ UWord pm_off_hi = get_primary_map_low_offset(a_hi);
+ if (LIKELY(pm_off_lo == pm_off_hi)) {
+ /* Now we know that the entire address range falls within a
+ single secondary map, and that that secondary 'lives' in
+ the main primary map. */
+ SecMap* sm = get_secmap_for_writing_low(a_lo);
+ UWord v_off = SM_OFF(a_lo);
+ UShort* p = (UShort*)(&sm->vabits8[v_off]);
+ p[ 0] = VA_BITS16_UNDEFINED;
+ p[ 1] = VA_BITS16_UNDEFINED;
+ p[ 2] = VA_BITS16_UNDEFINED;
+ p[ 3] = VA_BITS16_UNDEFINED;
+ p[ 4] = VA_BITS16_UNDEFINED;
+ p[ 5] = VA_BITS16_UNDEFINED;
+ p[ 6] = VA_BITS16_UNDEFINED;
+ p[ 7] = VA_BITS16_UNDEFINED;
+ p[ 8] = VA_BITS16_UNDEFINED;
+ p[ 9] = VA_BITS16_UNDEFINED;
+ p[10] = VA_BITS16_UNDEFINED;
+ p[11] = VA_BITS16_UNDEFINED;
+ p[12] = VA_BITS16_UNDEFINED;
+ p[13] = VA_BITS16_UNDEFINED;
+ p[14] = VA_BITS16_UNDEFINED;
+ p[15] = VA_BITS16_UNDEFINED;
+ p[16] = VA_BITS16_UNDEFINED;
+ p[17] = VA_BITS16_UNDEFINED;
+ p[18] = VA_BITS16_UNDEFINED;
+ p[19] = VA_BITS16_UNDEFINED;
+ p[20] = VA_BITS16_UNDEFINED;
+ p[21] = VA_BITS16_UNDEFINED;
+ p[22] = VA_BITS16_UNDEFINED;
+ p[23] = VA_BITS16_UNDEFINED;
+ p[24] = VA_BITS16_UNDEFINED;
+ p[25] = VA_BITS16_UNDEFINED;
+ p[26] = VA_BITS16_UNDEFINED;
+ p[27] = VA_BITS16_UNDEFINED;
+ p[28] = VA_BITS16_UNDEFINED;
+ p[29] = VA_BITS16_UNDEFINED;
+ p[30] = VA_BITS16_UNDEFINED;
+ p[31] = VA_BITS16_UNDEFINED;
+ p[32] = VA_BITS16_UNDEFINED;
+ p[33] = VA_BITS16_UNDEFINED;
+ p[34] = VA_BITS16_UNDEFINED;
+ p[35] = VA_BITS16_UNDEFINED;
+ return;
+ }
+ }
+ }
+
+ /* else fall into slow case */
+ make_mem_undefined(base, len);
+}
+
+
+/* And this is an even more specialised case, for the case where there
+ is no origin tracking, and the length is 128. */
+VG_REGPARM(1)
+void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
+{
+ PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
+ if (0)
+ VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
+
+# if 0
+ /* Slow(ish) version, which is fairly easily seen to be correct.
+ */
+ if (LIKELY( VG_IS_8_ALIGNED(base) )) {
+ make_aligned_word64_undefined(base + 0);
+ make_aligned_word64_undefined(base + 8);
+ make_aligned_word64_undefined(base + 16);
+ make_aligned_word64_undefined(base + 24);
+
+ make_aligned_word64_undefined(base + 32);
+ make_aligned_word64_undefined(base + 40);
+ make_aligned_word64_undefined(base + 48);
+ make_aligned_word64_undefined(base + 56);
+
+ make_aligned_word64_undefined(base + 64);
+ make_aligned_word64_undefined(base + 72);
+ make_aligned_word64_undefined(base + 80);
+ make_aligned_word64_undefined(base + 88);
+
+ make_aligned_word64_undefined(base + 96);
+ make_aligned_word64_undefined(base + 104);
+ make_aligned_word64_undefined(base + 112);
+ make_aligned_word64_undefined(base + 120);
+ } else {
+ make_mem_undefined(base, 128);
+ }
+# endif
+
+ /* Idea is: go fast when
+ * 16-aligned and length is 128
+ * the sm is available in the main primary map
+ * the address range falls entirely with a single secondary map
+ If all those conditions hold, just update the V+A bits by writing
+ directly into the vabits array. (If the sm was distinguished, this
+ will make a copy and then write to it.)
+
+ Typically this applies to amd64 'ret' instructions, since RSP is
+ 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
+ */
+ if (LIKELY( VG_IS_16_ALIGNED(base) )) {
+ /* Now we know the address range is suitably sized and aligned. */
+ UWord a_lo = (UWord)(base);
+ UWord a_hi = (UWord)(base + 128 - 1);
+ /* FIXME: come up with a sane story on the wraparound case
+ (which of course cnanot happen, but still..) */
+ /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
+ if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
+ /* Now we know the entire range is within the main primary map. */
+ UWord pm_off_lo = get_primary_map_low_offset(a_lo);
+ UWord pm_off_hi = get_primary_map_low_offset(a_hi);
+ if (LIKELY(pm_off_lo == pm_off_hi)) {
+ /* Now we know that the entire address range falls within a
+ single secondary map, and that that secondary 'lives' in
+ the main primary map. */
+ PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
+ SecMap* sm = get_secmap_for_writing_low(a_lo);
+ UWord v_off = SM_OFF(a_lo);
+ UInt* w32 = (UInt*)(&sm->vabits8[v_off]);
+ w32[ 0] = VA_BITS32_UNDEFINED;
+ w32[ 1] = VA_BITS32_UNDEFINED;
+ w32[ 2] = VA_BITS32_UNDEFINED;
+ w32[ 3] = VA_BITS32_UNDEFINED;
+ w32[ 4] = VA_BITS32_UNDEFINED;
+ w32[ 5] = VA_BITS32_UNDEFINED;
+ w32[ 6] = VA_BITS32_UNDEFINED;
+ w32[ 7] = VA_BITS32_UNDEFINED;
+ return;
+ }
+ }
+ }
+
+ /* The same, but for when base is 8 % 16, which is the situation
+ with RSP for amd64-ELF immediately after call instructions.
+ */
+ if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
+ /* Now we know the address range is suitably sized and aligned. */
+ UWord a_lo = (UWord)(base);
+ UWord a_hi = (UWord)(base + 128 - 1);
+ /* FIXME: come up with a sane story on the wraparound case
+ (which of course cnanot happen, but still..) */
+ /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
+ if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
+ /* Now we know the entire range is within the main primary map. */
+ UWord pm_off_lo = get_primary_map_low_offset(a_lo);
+ UWord pm_off_hi = get_primary_map_low_offset(a_hi);
+ if (LIKELY(pm_off_lo == pm_off_hi)) {
+ PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
+ /* Now we know that the entire address range falls within a
+ single secondary map, and that that secondary 'lives' in
+ the main primary map. */
+ SecMap* sm = get_secmap_for_writing_low(a_lo);
+ UWord v_off = SM_OFF(a_lo);
+ UShort* w16 = (UShort*)(&sm->vabits8[v_off]);
+ UInt* w32 = (UInt*)(&w16[1]);
+ /* The following assertion is commented out for obvious
+ performance reasons, but was verified as valid when
+ running the entire testsuite and also Firefox. */
+ /* tl_assert(VG_IS_4_ALIGNED(w32)); */
+ w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
+ w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
+ w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
+ w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
+ w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
+ w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
+ w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
+ w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
+ w16[15] = VA_BITS16_UNDEFINED; // w16[15]
+ return;
+ }
+ }
+ }
+
+ /* else fall into slow case */
+ PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
+ make_mem_undefined(base, 128);
+}
+
+
/*------------------------------------------------------------*/
/*--- Checking memory ---*/
/*------------------------------------------------------------*/
@@ -6991,6 +7257,15 @@
[MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
[MCPE_NEW_MEM_STACK] = "new_mem_stack",
[MCPE_DIE_MEM_STACK] = "die_mem_stack",
+ [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
+ [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
+ [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
+ [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
+ = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
+ [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
+ = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
+ [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
+ = "MAKE_STACK_UNINIT_128_no_o_slowcase",
};
static void init_prof_mem ( void )
@@ -7869,6 +8144,8 @@
# endif
}
+STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
+
VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
/*--------------------------------------------------------------------*/
Modified: trunk/memcheck/mc_translate.c
==============================================================================
--- trunk/memcheck/mc_translate.c (original)
+++ trunk/memcheck/mc_translate.c Wed Apr 6 10:52:17 2016
@@ -5512,20 +5512,36 @@
void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
{
IRDirty* di;
- /* Minor optimisation: if not doing origin tracking, ignore the
- supplied nia and pass zero instead. This is on the basis that
- MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
- almost always generate a shorter instruction to put zero into a
- register than any other value. */
- if (MC_(clo_mc_level) < 3)
- nia = mkIRExpr_HWord(0);
-
- di = unsafeIRDirty_0_N(
- 0/*regparms*/,
- "MC_(helperc_MAKE_STACK_UNINIT)",
- VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
- mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
- );
+
+ if (MC_(clo_mc_level) == 3) {
+ di = unsafeIRDirty_0_N(
+ 3/*regparms*/,
+ "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
+ VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_w_o) ),
+ mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
+ );
+ } else {
+ /* We ignore the supplied nia, since it is irrelevant. */
+ tl_assert(MC_(clo_mc_level) == 2 || MC_(clo_mc_level) == 1);
+ /* Special-case the len==128 case, since that is for amd64-ELF,
+ which is a very common target. */
+ if (len == 128) {
+ di = unsafeIRDirty_0_N(
+ 1/*regparms*/,
+ "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
+ VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o)),
+ mkIRExprVec_1( base )
+ );
+ } else {
+ di = unsafeIRDirty_0_N(
+ 2/*regparms*/,
+ "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
+ VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_no_o) ),
+ mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
+ );
+ }
+ }
+
stmt( 'V', mce, IRStmt_Dirty(di) );
}
|