You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
1
(1) |
2
(10) |
3
(10) |
4
(14) |
5
(10) |
|
6
|
7
(1) |
8
(14) |
9
(10) |
10
(4) |
11
(2) |
12
(4) |
|
13
|
14
(2) |
15
(11) |
16
(10) |
17
|
18
|
19
(18) |
|
20
(14) |
21
(10) |
22
(1) |
23
(11) |
24
(14) |
25
(10) |
26
(6) |
|
27
(13) |
28
(13) |
29
(12) |
30
(13) |
|
|
|
|
From: <sv...@va...> - 2014-04-22 22:07:15
|
Author: philippe
Date: Tue Apr 22 22:07:01 2014
New Revision: 13904
Log:
* Implement optimisation in storev8/16/64 similar to optimisations in storev32
See comments in MC_(helperc_STOREV8) for a detailed explanation.
On modern cpu, this gives a few percent speed improvement for
some perf tests, and reduces the nr of undistinguished secondary
map.
Modified:
trunk/memcheck/mc_main.c
Modified: trunk/memcheck/mc_main.c
==============================================================================
--- trunk/memcheck/mc_main.c (original)
+++ trunk/memcheck/mc_main.c Tue Apr 22 22:07:01 2014
@@ -607,7 +607,7 @@
return &am->sm;
}
-static SecMap** get_secmap_ptr ( Addr a )
+static INLINE SecMap** get_secmap_ptr ( Addr a )
{
return ( a <= MAX_PRIMARY_ADDRESS
? get_secmap_low_ptr(a)
@@ -659,7 +659,7 @@
writable copy of it, install it, and return the copy instead. (COW
semantics).
*/
-static SecMap* get_secmap_for_writing ( Addr a )
+static INLINE SecMap* get_secmap_for_writing ( Addr a )
{
return ( a <= MAX_PRIMARY_ADDRESS
? get_secmap_for_writing_low (a)
@@ -1235,7 +1235,7 @@
ULong pessim64 = V_BITS64_DEFINED;
UWord long_index = byte_offset_w(szL, bigendian, j);
for (i = 8-1; i >= 0; i--) {
- PROF_EVENT(31, "mc_LOADV_128_or_256_slow(loop)");
+ PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
ok = get_vbits8(ai, &vbits8);
vbits64 <<= 8;
@@ -1472,8 +1472,9 @@
cases on 64-bit platforms. Are merely a speedup hack; can be
omitted without loss of correctness/functionality. Note that in
both cases the "sizeof(void*) == 8" causes these cases to be
- folded out by compilers on 32-bit platforms. These are derived
- from STOREV64 and STOREV32.
+ folded out by compilers on 32-bit platforms. The logic below
+ is somewhat similar to some cases extensively commented in
+ MC_(helperc_STOREV8).
*/
if (LIKELY(sizeof(void*) == 8
&& nBits == 64 && VG_IS_8_ALIGNED(a))) {
@@ -1796,7 +1797,7 @@
void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
{
- PROF_EVENT(41, "MC_(make_mem_undefined)");
+ PROF_EVENT(43, "MC_(make_mem_undefined)");
DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
if (UNLIKELY( MC_(clo_mc_level) == 3 ))
@@ -4428,27 +4429,35 @@
sm_off16 = SM_OFF_16(a);
vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
- if (LIKELY( !is_distinguished_sm(sm) &&
- (VA_BITS16_DEFINED == vabits16 ||
- VA_BITS16_UNDEFINED == vabits16) ))
- {
- /* Handle common case quickly: a is suitably aligned, */
- /* is mapped, and is addressible. */
- // Convert full V-bits in register to compact 2-bit form.
- if (V_BITS64_DEFINED == vbits64) {
+ // To understand the below cleverness, see the extensive comments
+ // in MC_(helperc_STOREV8).
+ if (LIKELY(V_BITS64_DEFINED == vbits64)) {
+ if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
+ return;
+ }
+ if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
- } else if (V_BITS64_UNDEFINED == vbits64) {
- ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
- } else {
- /* Slow but general case -- writing partially defined bytes. */
- PROF_EVENT(212, "mc_STOREV64-slow2");
- mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
+ return;
}
- } else {
- /* Slow but general case. */
- PROF_EVENT(213, "mc_STOREV64-slow3");
+ PROF_EVENT(232, "mc_STOREV64-slow2");
mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
+ return;
}
+ if (V_BITS64_UNDEFINED == vbits64) {
+ if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
+ return;
+ }
+ if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
+ ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
+ return;
+ }
+ PROF_EVENT(232, "mc_STOREV64-slow3");
+ mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
+ return;
+ }
+
+ PROF_EVENT(212, "mc_STOREV64-slow4");
+ mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
}
#endif
}
@@ -4536,35 +4545,35 @@
sm_off = SM_OFF(a);
vabits8 = sm->vabits8[sm_off];
- // Cleverness: sometimes we don't have to write the shadow memory at
- // all, if we can tell that what we want to write is the same as what is
- // already there. The 64/16/8 bit cases also have cleverness at this
- // point, but it works a little differently to the code below.
- if (V_BITS32_DEFINED == vbits32) {
- if (vabits8 == (UInt)VA_BITS8_DEFINED) {
+ // To understand the below cleverness, see the extensive comments
+ // in MC_(helperc_STOREV8).
+ if (LIKELY(V_BITS32_DEFINED == vbits32)) {
+ if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
return;
- } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
+ }
+ if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
- } else {
- // not defined/undefined, or distinguished and changing state
- PROF_EVENT(232, "mc_STOREV32-slow2");
- mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
+ return;
}
- } else if (V_BITS32_UNDEFINED == vbits32) {
+ PROF_EVENT(232, "mc_STOREV32-slow2");
+ mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
+ return;
+ }
+ if (V_BITS32_UNDEFINED == vbits32) {
if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
return;
- } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
+ }
+ if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
- } else {
- // not defined/undefined, or distinguished and changing state
- PROF_EVENT(233, "mc_STOREV32-slow3");
- mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
+ return;
}
- } else {
- // Partially defined word
- PROF_EVENT(234, "mc_STOREV32-slow4");
+ PROF_EVENT(233, "mc_STOREV32-slow3");
mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
+ return;
}
+
+ PROF_EVENT(234, "mc_STOREV32-slow4");
+ mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
}
#endif
}
@@ -4631,6 +4640,18 @@
return mc_LOADV16(a, False);
}
+/* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
+static INLINE
+Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
+{
+ UInt shift;
+ tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
+ shift = (a & 2) << 1; // shift by 0 or 4
+ vabits8 >>= shift; // shift the four bits to the bottom
+ // check 2 x vabits2 != VA_BITS2_NOACCESS
+ return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
+ && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
+}
static INLINE
void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
@@ -4653,29 +4674,39 @@
sm = get_secmap_for_reading_low(a);
sm_off = SM_OFF(a);
vabits8 = sm->vabits8[sm_off];
- if (LIKELY( !is_distinguished_sm(sm) &&
- (VA_BITS8_DEFINED == vabits8 ||
- VA_BITS8_UNDEFINED == vabits8) ))
- {
- /* Handle common case quickly: a is suitably aligned, */
- /* is mapped, and is addressible. */
- // Convert full V-bits in register to compact 2-bit form.
- if (V_BITS16_DEFINED == vbits16) {
- insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
+
+ // To understand the below cleverness, see the extensive comments
+ // in MC_(helperc_STOREV8).
+ if (LIKELY(V_BITS16_DEFINED == vbits16)) {
+ if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
+ return;
+ }
+ if (!is_distinguished_sm(sm)
+ && accessible_vabits4_in_vabits8(a, vabits8)) {
+ insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
&(sm->vabits8[sm_off]) );
- } else if (V_BITS16_UNDEFINED == vbits16) {
+ return;
+ }
+ PROF_EVENT(232, "mc_STOREV16-slow2");
+ mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
+ }
+ if (V_BITS16_UNDEFINED == vbits16) {
+ if (vabits8 == VA_BITS8_UNDEFINED) {
+ return;
+ }
+ if (!is_distinguished_sm(sm)
+ && accessible_vabits4_in_vabits8(a, vabits8)) {
insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
&(sm->vabits8[sm_off]) );
- } else {
- /* Slow but general case -- writing partially defined bytes. */
- PROF_EVENT(252, "mc_STOREV16-slow2");
- mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
+ return;
}
- } else {
- /* Slow but general case. */
- PROF_EVENT(253, "mc_STOREV16-slow3");
+ PROF_EVENT(233, "mc_STOREV16-slow3");
mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
+ return;
}
+
+ PROF_EVENT(234, "mc_STOREV16-slow4");
+ mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
}
#endif
}
@@ -4756,33 +4787,78 @@
sm = get_secmap_for_reading_low(a);
sm_off = SM_OFF(a);
vabits8 = sm->vabits8[sm_off];
- if (LIKELY
- ( !is_distinguished_sm(sm) &&
- ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
- || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
- )
- )
- )
- {
- /* Handle common case quickly: a is mapped, the entire word32 it
- lives in is addressible. */
- // Convert full V-bits in register to compact 2-bit form.
- if (V_BITS8_DEFINED == vbits8) {
+
+ // Clevernesses to speed up storing V bits.
+ // The 64/32/16 bit cases also have similar clevernesses, but it
+ // works a little differently to the code below.
+ //
+ // Cleverness 1: sometimes we don't have to write the shadow memory at
+ // all, if we can tell that what we want to write is the same as what is
+ // already there. These cases are marked below as "defined on defined" and
+ // "undefined on undefined".
+ //
+ // Cleverness 2:
+ // We also avoid to call mc_STOREVn_slow if the V bits can directly
+ // be written in the secondary map. V bits can be directly written
+ // if 4 conditions are respected:
+ // * The address for which V bits are written is naturally aligned
+ // on 1 byte for STOREV8 (this is always true)
+ // on 2 bytes for STOREV16
+ // on 4 bytes for STOREV32
+ // on 8 bytes for STOREV64.
+ // * V bits being written are either fully defined or fully undefined.
+ // (for partially defined V bits, V bits cannot be directly written,
+ // as the secondary vbits table must be maintained).
+ // * the secmap is not distinguished (distinguished maps cannot be
+ // modified).
+ // * the memory corresponding to the V bits being written is
+ // accessible (if one or more bytes are not accessible,
+ // we must call mc_STOREVn_slow in order to report accessibility
+ // errors).
+ // Note that for STOREV32 and STOREV64, it is too expensive
+ // to verify the accessibility of each byte for the benefit it
+ // brings. Instead, a quicker check is done by comparing to
+ // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
+ // but misses some opportunity of direct modifications.
+ // Checking each byte accessibility was measured for
+ // STOREV32+perf tests and was slowing down all perf tests.
+ // The cases corresponding to cleverness 2 are marked below as
+ // "direct mod".
+ if (LIKELY(V_BITS8_DEFINED == vbits8)) {
+ if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
+ return; // defined on defined
+ }
+ if (!is_distinguished_sm(sm)
+ && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
+ // direct mod
insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
- &(sm->vabits8[sm_off]) );
- } else if (V_BITS8_UNDEFINED == vbits8) {
+ &(sm->vabits8[sm_off]) );
+ return;
+ }
+ PROF_EVENT(232, "mc_STOREV8-slow2");
+ mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
+ return;
+ }
+ if (V_BITS8_UNDEFINED == vbits8) {
+ if (vabits8 == VA_BITS8_UNDEFINED) {
+ return; // undefined on undefined
+ }
+ if (!is_distinguished_sm(sm)
+ && (VA_BITS2_NOACCESS
+ != extract_vabits2_from_vabits8(a, vabits8))) {
+ // direct mod
insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
- &(sm->vabits8[sm_off]) );
- } else {
- /* Slow but general case -- writing partially defined bytes. */
- PROF_EVENT(272, "mc_STOREV8-slow2");
- mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
+ &(sm->vabits8[sm_off]) );
+ return;
}
- } else {
- /* Slow but general case. */
- PROF_EVENT(273, "mc_STOREV8-slow3");
+ PROF_EVENT(233, "mc_STOREV8-slow3");
mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
+ return;
}
+
+ // Partially defined word
+ PROF_EVENT(234, "mc_STOREV8-slow4");
+ mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
}
#endif
}
|