You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
1
(8) |
2
(11) |
3
(21) |
4
(15) |
5
(10) |
|
6
(7) |
7
(7) |
8
(5) |
9
(7) |
10
(5) |
11
(1) |
12
(21) |
|
13
(8) |
14
(17) |
15
(6) |
16
(10) |
17
(7) |
18
(6) |
19
(15) |
|
20
(12) |
21
(16) |
22
(25) |
23
(14) |
24
(10) |
25
(7) |
26
(6) |
|
27
(34) |
28
(13) |
29
(10) |
30
(8) |
|
|
|
|
From: Nicholas N. <nj...@ca...> - 2004-06-02 20:43:29
|
CVS commit by nethercote:
Fix wishlist item 82098, thanks to Ralf Wildenhues:
ANSIfication of the hp2ps code. The most important changes are the correct
use of the stdarg mechanism (former hacks could bite on other systems, so
please tell upstream), inclusion of stdlib.h instead of declaring free
yourself, adding a few missed PROTO()s and using size_t for xmalloc and
xrealloc.:
M +1 -2 AreaBelow.c 1.3
M +2 -1 Curves.c 1.3
M +1 -2 Deviation.c 1.3
M +14 -10 Error.c 1.3 [POSSIBLY UNSAFE: printf]
M +3 -3 Error.h 1.3
M +1 -2 Scale.c 1.3
M +1 -2 TopTwenty.c 1.3
M +1 -2 TraceElement.c 1.3
M +2 -2 Utilities.c 1.4
M +2 -2 Utilities.h 1.3
--- valgrind/massif/hp2ps/AreaBelow.c #1.2:1.3
@@ -4,4 +4,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include "Main.h"
#include "Defines.h"
@@ -13,6 +14,4 @@
#include "AreaBelow.h"
-extern void free();
-
/*
* Return the area enclosed by all of the curves. The algorithm
--- valgrind/massif/hp2ps/Curves.c #1.2:1.3
@@ -21,5 +21,6 @@ static floatish *g_py; /* previous y va
static void Curve PROTO((struct entry *)); /* forward */
-static void ShadeCurve(); /* forward */
+static void ShadeCurve
+ PROTO((floatish *x, floatish *y, floatish *py, floatish shade));
void
--- valgrind/massif/hp2ps/Deviation.c #1.2:1.3
@@ -4,4 +4,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <math.h>
@@ -12,6 +13,4 @@
#include "Utilities.h"
-extern void free();
-
/* own stuff */
#include "Deviation.h"
--- valgrind/massif/hp2ps/Error.c #1.2:1.3
@@ -3,5 +3,7 @@
This program is governed by the license contained in the file LICENSE. */
+#include <stdarg.h>
#include <stdio.h>
+#include <stdlib.h>
#include "Main.h"
#include "Defines.h"
@@ -10,14 +12,14 @@
#include "Error.h"
-void exit PROTO((int));
-
/*VARARGS0*/
void
-Error(a1,a2,a3,a4)
- char* a1; char* a2; char* a3; char* a4;
+Error(const char *fmt, ...)
{
+ va_list ap;
fflush(stdout);
fprintf(stderr, "%s: ", programname);
- fprintf(stderr, a1, a2, a3, a4);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
fprintf(stderr, "\n");
exit(1);
@@ -26,11 +28,13 @@ Error(a1,a2,a3,a4)
/*VARARGS0*/
void
-Disaster(a1,a2,a3,a4)
- char* a1; char* a2; char* a3; char* a4;
+Disaster(const char *fmt, ...)
{
+ va_list ap;
fflush(stdout);
fprintf(stderr, "%s: ", programname);
fprintf(stderr, " Disaster! (");
- fprintf(stderr, a1, a2, a3, a4);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
fprintf(stderr, ")\n");
exit(1);
@@ -39,5 +43,5 @@ Disaster(a1,a2,a3,a4)
void
Usage(str)
- char *str;
+ const char *str;
{
if (str) printf("error: %s\n", str);
--- valgrind/massif/hp2ps/Error.h #1.2:1.3
@@ -6,7 +6,7 @@
#define ERROR_H
-extern void Error (); /*PROTO((char *, ...)); */
-extern void Disaster (); /* PROTO((char *, ...)); */
-extern void Usage (); /* PROTO((char *)); */
+extern void Error PROTO((const char *, ...));
+extern void Disaster PROTO((const char *, ...));
+extern void Usage PROTO((const char *));
#endif /* ERROR_H */
--- valgrind/massif/hp2ps/Scale.c #1.2:1.3
@@ -4,4 +4,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include "Main.h"
#include "Defines.h"
@@ -21,6 +22,4 @@
*/
-extern void free();
-
floatish
MaxCombinedHeight()
--- valgrind/massif/hp2ps/TopTwenty.c #1.2:1.3
@@ -4,4 +4,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include "Main.h"
#include "Defines.h"
@@ -22,6 +23,4 @@
*/
-extern void free();
-
void
TopTwenty()
--- valgrind/massif/hp2ps/TraceElement.c #1.2:1.3
@@ -4,4 +4,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include "Main.h"
#include "Defines.h"
@@ -21,6 +22,4 @@
*/
-extern void free();
-
extern floatish thresholdpercent;
--- valgrind/massif/hp2ps/Utilities.c #1.3:1.4
@@ -84,5 +84,5 @@ CommaPrint(fp,n)
void *
xmalloc(n)
- int n;
+ size_t n;
{
void *r;
@@ -99,5 +99,5 @@ void *
xrealloc(p, n)
void *p;
- int n;
+ size_t n;
{
void *r;
--- valgrind/massif/hp2ps/Utilities.h #1.2:1.3
@@ -12,6 +12,6 @@ void CommaPrint PROTO((FILE *, intish)
char *copystring PROTO((char *));
char *copystring2 PROTO((char *, char *));
-void *xmalloc PROTO((int));
-void *xrealloc PROTO((void *, int));
+void *xmalloc PROTO((size_t));
+void *xrealloc PROTO((void *, size_t));
#endif /* UTILITIES_H */
|
|
From: Nicholas N. <nj...@ca...> - 2004-06-02 20:26:22
|
CVS commit by nethercote: Added Xsupplicant. M +3 -0 users.html 1.67 --- devel-home/valgrind/users.html #1.66:1.67 @@ -336,4 +336,7 @@ <dt><a href="http://www.nomachine.com/developers.php">NX</a> <dd>X Window compression software. + +<dt><a href="http://www.open1x.org">Xsupplicant</a> +<dd>An 802.1X client for Linux. </dl> |
|
From: Stephen M.
|
I thought you might be interested to hear about a Valgrind-based tool our research group just released the first version of. Named Kvasir, after the Norse god of beet juice (and knowledge), it constructs a trace file giving the values of the parameters to each function call, and their return values (plus pointed-to values, to a limited depth). We use this trace information as input to Daikon, our tool for dynamically detecting likely program invariants, but the file format is ASCII and fairly general, so it can also be used for other sorts of dynamic analysis. Kvasir uses Valgrind to get control at function entires and exits, and Memcheck to check whether pointers are valid before following them. In future versions, we also plan to use Memcheck's information to determine the extent of dynamically allocated arrays. We get information about names and types from DWARF-2 debugging sections in the binary. Leveraging Valgrind's infrastructure has been amazingly helpful in getting this project together: after one undergraduate-semester's worth of effort, we already have a tool that works better than our old, source-based instrumenter does after years of effort. Thanks a bunch. Kvasir is GPL'ed, and our first release is available in source form as part of the main Daikon distribution, at http://pag.csail.mit.edu/daikon/download/ This first version is a bit roughly assembled, being based on the Sparrow skin, parts of Memcheck, and the GNU Binutils "readelf" program, assembled with some modifications essentially by cut and paste (along with a slightly modified version of the Valgrind core, circa 2.1.1) together with our own code. In the future, we'd like to use resources from Valgrind in a more principled way, ideally making a tool that could be dropped into an unmodified Valgrind distribution. We expect there are only a few places in the rest of Valgrind that would need to be more flexible to accommodate this (for instance, we don't want Memcheck's V bits to be set to "valid" after an invalid access). Kvasir's main developer is at another job over the summer, though, so we won't be working on that until the fall. Thanks again for the great tool, Stephen McCamant Graduate Student MIT CSAIL Program Analysis Group |
|
From: Nicholas N. <nj...@ca...> - 2004-06-02 14:48:22
|
CVS commit by nethercote:
Added "repne movs", not official but seems to occur. Also restructured the
rep/repe/repne cases to use a switch instead of if/else, as it's cleaner.
M +47 -34 vg_to_ucode.c 1.138
--- valgrind/coregrind/vg_to_ucode.c #1.137:1.138
@@ -6208,5 +6208,6 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
- case 0xF2: { /* REPNE prefix insn */
+ /* REPNE prefix insn */
+ case 0xF2: {
Addr eip_orig = eip - 1;
vg_assert(sorb == 0);
@@ -6212,12 +6213,27 @@ static Addr disInstr ( UCodeBlock* cb, A
vg_assert(sorb == 0);
abyte = getUChar(eip); eip++;
+
if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
+ *isEnd = True;
- if (abyte == 0xAE || abyte == 0xAF) { /* REPNE SCAS<sz> */
- if (abyte == 0xAE) sz = 1;
+ switch (abyte) {
+ /* According to the Intel manual, "repne movs" should never occur, but
+ * in practice it has happened, so allow for it here... */
+ case 0xA4: sz = 1; /* REPNE MOVS<sz> */
+ case 0xA5:
+ dis_REP_op ( cb, CondNZ, dis_MOVS, sz, eip_orig, eip, "repne movs" );
+ break;
+
+ case 0xA6: sz = 1; /* REPNE CMPS<sz> */
+ case 0xA7:
+ dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" );
+ break;
+
+ case 0xAE: sz = 1; /* REPNE SCAS<sz> */
+ case 0xAF:
dis_REP_op ( cb, CondNZ, dis_SCAS, sz, eip_orig, eip, "repne scas" );
- *isEnd = True;
- }
- else {
+ break;
+
+ default:
goto decode_failure;
}
@@ -6233,44 +6249,41 @@ static Addr disInstr ( UCodeBlock* cb, A
if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; }
+ *isEnd = True;
- if (abyte == 0xA4 || abyte == 0xA5) { /* REP MOV<sz> */
- if (abyte == 0xA4) sz = 1;
+ switch (abyte) {
+ case 0xA4: sz = 1; /* REP MOVS<sz> */
+ case 0xA5:
dis_REP_op ( cb, CondAlways, dis_MOVS, sz, eip_orig, eip, "rep movs" );
- *isEnd = True;
- }
- else
- if (abyte == 0xA6 || abyte == 0xA7) { /* REPE CMP<sz> */
- if (abyte == 0xA6) sz = 1;
+ break;
+
+ case 0xA6: sz = 1; /* REPE CMP<sz> */
+ case 0xA7:
dis_REP_op ( cb, CondZ, dis_CMPS, sz, eip_orig, eip, "repe cmps" );
- *isEnd = True;
- }
- else
- if (abyte == 0xAA || abyte == 0xAB) { /* REP STOS<sz> */
- if (abyte == 0xAA) sz = 1;
+ break;
+
+ case 0xAA: sz = 1; /* REP STOS<sz> */
+ case 0XAB:
dis_REP_op ( cb, CondAlways, dis_STOS, sz, eip_orig, eip, "rep stos" );
- *isEnd = True;
- }
- else
- if (abyte == 0xAE || abyte == 0xAF) { /* REPE SCAS<sz> */
- if (abyte == 0xAE) sz = 1;
+ break;
+
+ case 0xAE: sz = 1; /* REPE SCAS<sz> */
+ case 0xAF:
dis_REP_op ( cb, CondZ, dis_SCAS, sz, eip_orig, eip, "repe scas" );
- *isEnd = True;
- }
- else
- if (abyte == 0x90) { /* REP NOP (PAUSE) */
+ break;
+
+ case 0x90: /* REP NOP (PAUSE) */
/* a hint to the P4 re spin-wait loop */
DIP("rep nop (P4 pause)\n");
jmp_lit(cb, eip);
LAST_UINSTR(cb).jmpkind = JmpYield;
- *isEnd = True;
- }
- else
- if (abyte == 0xC3) { /* REP RET */
+ break;
+
+ case 0xC3: /* REP RET */
/* AMD K7/K8-specific optimisation; faster than vanilla RET */
dis_ret(cb, 0);
- *isEnd = True;
DIP("rep ret\n");
- }
- else {
+ break;
+
+ default:
goto decode_failure;
}
|
|
From: Josef W. <Jos...@gm...> - 2004-06-02 14:41:26
|
On Wednesday 02 June 2004 15:56, Nicholas Nethercote wrote: > I think fixing it by changing Valgrind's core (ie. modifying BB layout) > is a bad idea -- I don't want to introduce nasty special cases into the > core just so Cachegrind can handle REP prefixes slightly more cleanly. Of course. The correction scheme I proposed seems the best one here, but I don't care if the special handling is removed (and would apply it to calltree as well). As you said: there are a few other minor shortcomings. > > Yes, perhaps that's the easiest: I don't think this JIFZ special case > > makes any big differences in the result anyway. Have you done any > > experiments regarding REP prefixes and the results from real hardware > > counters for "instructions retired"? > > No. And if we meassure, this is for sure processor specific... > (I found another inaccuracy in the simulation today -- CMPS does two data > accesses but Cachegrind only treats it as one. Again, only minor...) And the common coding sequence call x x: pop ax to get the instruction pointer is attributed with 1 instruction fetch only, as it is recoded in valgrind core as a register move. Josef |
|
From: Nicholas N. <nj...@ca...> - 2004-06-02 13:56:44
|
On Wed, 2 Jun 2004, Josef Weidendorfer wrote: > > 0x810120D7: rep stosl > > > > 17: CALLM_So > > 18: MOVL $0x0, t12 > > 19: PUSHL t12 > > 20: CALLMo $0xC6 (-rD) > > 21: POPL t12 > > 22: CALLM_Eo > > 23: SHLL $0x2, t12 > > 24: GETL %ECX, t14 > > <insert I-cache access here> > > 25: JIFZL t14, $0x810120D9 > > 26: DECL t14 > > 27: PUTL t14, %ECX > > 28: GETL %EAX, t16 > > 29: GETL %EDI, t18 > > 30: STL t16, (t18) > > 31: ADDL t12, t18 > > 32: PUTL t18, %EDI > > <insert D-cache access here> > > 33: JMPo $0x810120D7 > > > > > > I thought that putting the I-cache access before the JIFZ meant it would > > only be done once, whereas the D-cache access would be done N times. I > > now realise that is wrong; both will be done N times (an "N*I+N*D" > > model). I can't see how the 1*I+N*D model can be done without making big > > changes to the structure of basic blocks in the presence of REP prefixes. > > Isn't it actually "(N+1)*I+N*D" currently, i.e. always 1 instruction fetch > more than data fetches? Hmm, yes. > To correct this, a way would be to have 2 basic blocks for 1 instruction: One > with the instruction fetch, and 1 in the conditional loop with the data > fetch. Am I correct here? > As any instruction with a REP prefix has a size >1 byte, could we artifically > introduce 2 basic blocks? In the example above this would be one instrumented > block for 0x810120d7 (with the call to the instruction fetch), and one for > 0x810120d8 (with the data fetch). The problem here is of course that one can > not switch to the real processor at this point. > > So another idea: A flag to store if the instruction fetch was already done. > Also quite difficult and errorprone: when to reset the flag? > > Another idea: Correct the error afterwards: subtract the number of data > accesses from the number of instruction fetches... I think fixing it by changing Valgrind's core (ie. modifying BB layout) is a bad idea -- I don't want to introduce nasty special cases into the core just so Cachegrind can handle REP prefixes slightly more cleanly. Doing it in Cachegrind is much preferable, but I still can't see how to do that without it being a big pain. > > In which case, maybe the N*I/N*D model is ok. > > Yes, perhaps that's the easiest: I don't think this JIFZ special case makes > any big differences in the result anyway. Have you done any experiments > regarding REP prefixes and the results from real hardware counters for > "instructions retired"? No. > > Then there's one extra complication -- because the JIFZ can exit the basic > > block, putting the instrumentation at the end means that the last > > execution may not be simulated (this is also the case with the current > > method). A more precise approach would be to put the instrumentation > > before the JIFZ, although this would take effort. (A similar thing is > > true for the jecxz instruction, which is translated using JIFZ.) > > I don't see this problem. When CX==0, there is nothing to do (jumping out of > the basic block). Oh yeah, you're right; in this case no data accesses are occurring. I was wrong: the current JIFZ handling gives (N+1)*I + N*D. Removing the special case would give N*I + N*D. So they are different. Does anyone care? (I found another inaccuracy in the simulation today -- CMPS does two data accesses but Cachegrind only treats it as one. Again, only minor...) Thanks. N |
|
From: Josef W. <Jos...@gm...> - 2004-06-02 13:35:43
|
[forgot the mailing list] Hi Nick, On Wednesday 02 June 2004 14:45, Nicholas Nethercote wrote: > Hi all (and particularly Josef), > > I've been looking at Cachegrind, and realised that the JIFZ handling is > broken. > > Ages ago (September 2000) I put in some special handling for JIFZ, which > is used for REP-prefixed instructions. The idea was meant to be this: > since the REP prefix allows one instruction to do many accesses, the best > way to model it in the cache simulation is as if its execution causes 1 > I-cache access, but N D-cache accesses (a "1*I+N*D" model). Who knows if > modern machines actually do this, but it seemed a reasonable idea. I thought/think that this model is OK, and copied your implementation... > However, I just realised the way I implemented it was wrong. Here's what > the instrumented currently added looks like: > > > 0x810120D7: rep stosl > > 17: CALLM_So > 18: MOVL $0x0, t12 > 19: PUSHL t12 > 20: CALLMo $0xC6 (-rD) > 21: POPL t12 > 22: CALLM_Eo > 23: SHLL $0x2, t12 > 24: GETL %ECX, t14 > <insert I-cache access here> > 25: JIFZL t14, $0x810120D9 > 26: DECL t14 > 27: PUTL t14, %ECX > 28: GETL %EAX, t16 > 29: GETL %EDI, t18 > 30: STL t16, (t18) > 31: ADDL t12, t18 > 32: PUTL t18, %EDI > <insert D-cache access here> > 33: JMPo $0x810120D7 > > > I thought that putting the I-cache access before the JIFZ meant it would > only be done once, whereas the D-cache access would be done N times. I > now realise that is wrong; both will be done N times (an "N*I+N*D" > model). I can't see how the 1*I+N*D model can be done without making big > changes to the structure of basic blocks in the presence of REP prefixes. Isn't it actually "(N+1)*I+N*D" currently, i.e. always 1 instruction fetch more than data fetches? To correct this, a way would be to have 2 basic blocks for 1 instruction: One with the instruction fetch, and 1 in the conditional loop with the data fetch. Am I correct here? As any instruction with a REP prefix has a size >1 byte, could we artifically introduce 2 basic blocks? In the example above this would be one instrumented block for 0x810120d7 (with the call to the instruction fetch), and one for 0x810120d8 (with the data fetch). The problem here is of course that one can not switch to the real processor at this point. So another idea: A flag to store if the instruction fetch was already done. Also quite difficult and errorprone: when to reset the flag? Another idea: Correct the error afterwards: subtract the number of data accesses from the number of instruction fetches... > In which case, maybe the N*I/N*D model is ok. The easy solution is to > accept this, get rid of the special case, and just do both parts at the > end. This makes is really easy to do, just removes about 110 lines of > code. (And the behaviour would be identical to what we currently have > anyway). Yes, perhaps that's the easiest: I don't think this JIFZ special case makes any big differences in the result anyway. Have you done any experiments regarding REP prefixes and the results from real hardware counters for "instructions retired"? > Then there's one extra complication -- because the JIFZ can exit the basic > block, putting the instrumentation at the end means that the last > execution may not be simulated (this is also the case with the current > method). A more precise approach would be to put the instrumentation > before the JIFZ, although this would take effort. (A similar thing is > true for the jecxz instruction, which is translated using JIFZ.) I don't see this problem. When CX==0, there is nothing to do (jumping out of the basic block). Why are we losing here the last execution? Or does this problem appear if we get rid of the special casing? Josef > Anyone have any comments about all this? > > N > > > ------------------------------------------------------- > This SF.Net email is sponsored by the new InstallShield X. > From Windows to Linux, servers to mobile, InstallShield X is the one > installation-authoring solution that does it all. Learn more and > evaluate today! http://www.installshield.com/Dev2Dev/0504 > _______________________________________________ > Valgrind-developers mailing list > Val...@li... > https://lists.sourceforge.net/lists/listinfo/valgrind-developers |
|
From: Nicholas N. <nj...@ca...> - 2004-06-02 12:45:49
|
Hi all (and particularly Josef),
I've been looking at Cachegrind, and realised that the JIFZ handling is
broken.
Ages ago (September 2000) I put in some special handling for JIFZ, which
is used for REP-prefixed instructions. The idea was meant to be this:
since the REP prefix allows one instruction to do many accesses, the best
way to model it in the cache simulation is as if its execution causes 1
I-cache access, but N D-cache accesses (a "1*I+N*D" model). Who knows if
modern machines actually do this, but it seemed a reasonable idea.
However, I just realised the way I implemented it was wrong. Here's what
the instrumented currently added looks like:
0x810120D7: rep stosl
17: CALLM_So
18: MOVL $0x0, t12
19: PUSHL t12
20: CALLMo $0xC6 (-rD)
21: POPL t12
22: CALLM_Eo
23: SHLL $0x2, t12
24: GETL %ECX, t14
<insert I-cache access here>
25: JIFZL t14, $0x810120D9
26: DECL t14
27: PUTL t14, %ECX
28: GETL %EAX, t16
29: GETL %EDI, t18
30: STL t16, (t18)
31: ADDL t12, t18
32: PUTL t18, %EDI
<insert D-cache access here>
33: JMPo $0x810120D7
I thought that putting the I-cache access before the JIFZ meant it would
only be done once, whereas the D-cache access would be done N times. I
now realise that is wrong; both will be done N times (an "N*I+N*D"
model). I can't see how the 1*I+N*D model can be done without making big
changes to the structure of basic blocks in the presence of REP prefixes.
In which case, maybe the N*I/N*D model is ok. The easy solution is to
accept this, get rid of the special case, and just do both parts at the
end. This makes is really easy to do, just removes about 110 lines of
code. (And the behaviour would be identical to what we currently have
anyway).
Then there's one extra complication -- because the JIFZ can exit the basic
block, putting the instrumentation at the end means that the last
execution may not be simulated (this is also the case with the current
method). A more precise approach would be to put the instrumentation
before the JIFZ, although this would take effort. (A similar thing is
true for the jecxz instruction, which is translated using JIFZ.)
Anyone have any comments about all this?
N
|
|
From: Jeremy F. <je...@go...> - 2004-06-01 21:09:39
|
On Tue, 2004-06-01 at 09:46 +0100, Tom Hughes wrote: > The code to setup the signal frame in arch/i386/kernel/signal.c is > doing this: > > restorer = current->mm->context.vdso + (long)&__kernel_sigreturn; > > Now if vdso is set to zero then current->mm->context.vdso will be null > as no vdso will have been allocated. Hence restorer will be in zero > page which seems rather nasty. I'm a bit surprised anything at all > works with vdso's turned off in fact. Oh, I see how it works. It actually uses the sa_restorer in sigaction, to point to an alternative function __restore_rt: 0x550113f0 <__restore_rt+0>: mov $__NR_rt_sigreturn,%eax 0x550113f5 <__restore_rt+5>: int $0x80 0x550113f7 <__restore_rt+7>: nop This should be pretty easy to duplicate. J |
|
From: Jeremy F. <je...@go...> - 2004-06-01 20:38:59
|
On Tue, 2004-06-01 at 09:46 +0100, Tom Hughes wrote: > In message <1086068817.2794.7.camel@localhost.localdomain> > Jeremy Fitzhardinge <je...@go...> wrote: > > > The initial problem is caused by VDSOs, which are placed low in the > > address space. When Valgrind clears out the client area in stage2, it > > also clears out the sysinfo page, which happens to be where the munmap > > syscall returns to... > > I believe they are actually placed at a random address, or so the > kernel source claims. It's random within the range 0x00111000-0x01000000, I think. > The code to setup the signal frame in arch/i386/kernel/signal.c is > doing this: > > restorer = current->mm->context.vdso + (long)&__kernel_sigreturn; > > Now if vdso is set to zero then current->mm->context.vdso will be null > as no vdso will have been allocated. Hence restorer will be in zero > page which seems rather nasty. I'm a bit surprised anything at all > works with vdso's turned off in fact. Yes that's pretty much exactly what I was expecting to see. I guess people don't run with vdsos off. I'm trying to get a little standalone program to fail in the same way, but it seems to work for reasons I don't understand yet. J |
|
From: Tom H. <th...@cy...> - 2004-06-01 08:46:41
|
In message <1086068817.2794.7.camel@localhost.localdomain>
Jeremy Fitzhardinge <je...@go...> wrote:
> The initial problem is caused by VDSOs, which are placed low in the
> address space. When Valgrind clears out the client area in stage2, it
> also clears out the sysinfo page, which happens to be where the munmap
> syscall returns to...
I believe they are actually placed at a random address, or so the
kernel source claims.
> So, if you turn off VDSOs (echo 0 > /proc/sys/kernel/vdso), you get to
> the next crash. This is a fair bit further on, typicially when the
> client wants to expand the stack. It gets a fault, calls our SIGSEGV
> handler, which expands the stack, and then returns from the handler with
> the intent of restarting the faulting instruction - BANG. The handler
> returns to address 0x440 and explodes. I haven't confirmed this yet,
> but it looks to me like a FC2 kernel bug. I'm guessing that when it
> sets up the signal frame return address, it uses the sysinfo syscall
> return address, which is sysinfo_page + 0x440 - and since the sysinfo
> page hasn't been installed, it's just 0x440.
The code to setup the signal frame in arch/i386/kernel/signal.c is
doing this:
restorer = current->mm->context.vdso + (long)&__kernel_sigreturn;
Now if vdso is set to zero then current->mm->context.vdso will be null
as no vdso will have been allocated. Hence restorer will be in zero
page which seems rather nasty. I'm a bit surprised anything at all
works with vdso's turned off in fact.
Tom
--
Tom Hughes (th...@cy...)
Software Engineer, Cyberscience Corporation
http://www.cyberscience.com/
|
|
From: Tom H. <th...@cy...> - 2004-06-01 08:43:33
|
In message <Pin...@ye...>
Nicholas Nethercote <nj...@ca...> wrote:
> On Mon, 31 May 2004, Jeremy Fitzhardinge wrote:
>
>> The initial problem is caused by VDSOs, which are placed low in the
>> address space.
>
> What are VDSOs?
From looking at the kernel source I think it's just another name
for the sysinfo page - the map_vsyscall routine basically does nothing
if the vdso flag is turned off.
>> Either way, this is pretty tricky to deal with. We basically have to
>> leave the sysinfo page alone, even though it's sitting in the middle of
>> the client address space. We can't unmap it, because the signal
>> delivery machinery will still keep trying to use it.
>
> Is it the sysinfo page for stage1 or stage2? Where does the sysinfo page
> normally live -- I thought it was at a really high address?
Well there is only one sysinfo page because it is allocated by the
kernel on a per-process basis, so it effectively belongs to the stage1
valgrind process.
We currently then hide it from the simulated program so that it falls
back to using old style system calls. That should also be what happens
once the kernel.vdso sysctl is set to zero or if you boot with vdso=0.
Tom
--
Tom Hughes (th...@cy...)
Software Engineer, Cyberscience Corporation
http://www.cyberscience.com/
|
|
From: Nicholas N. <nj...@ca...> - 2004-06-01 08:09:03
|
On Mon, 31 May 2004, Jeremy Fitzhardinge wrote: > The initial problem is caused by VDSOs, which are placed low in the > address space. What are VDSOs? > Either way, this is pretty tricky to deal with. We basically have to > leave the sysinfo page alone, even though it's sitting in the middle of > the client address space. We can't unmap it, because the signal > delivery machinery will still keep trying to use it. Is it the sysinfo page for stage1 or stage2? Where does the sysinfo page normally live -- I thought it was at a really high address? N |
|
From: Jeremy F. <je...@go...> - 2004-06-01 05:47:00
|
On Sat, 2004-05-29 at 12:13 +0100, Tom Hughes wrote: > Has anybody actually had valgrind running on FC2 yet? > > I don't seem to be able to get it to do anything more that SEGV on > startup at the moment, and I can't even attach gdb to it as it just > seems to hang... I built my own gdb 6 from source, and it seems to work - the FC2 standard one hangs for me too. I got some insight into what's happening. The initial problem is caused by VDSOs, which are placed low in the address space. When Valgrind clears out the client area in stage2, it also clears out the sysinfo page, which happens to be where the munmap syscall returns to... So, if you turn off VDSOs (echo 0 > /proc/sys/kernel/vdso), you get to the next crash. This is a fair bit further on, typicially when the client wants to expand the stack. It gets a fault, calls our SIGSEGV handler, which expands the stack, and then returns from the handler with the intent of restarting the faulting instruction - BANG. The handler returns to address 0x440 and explodes. I haven't confirmed this yet, but it looks to me like a FC2 kernel bug. I'm guessing that when it sets up the signal frame return address, it uses the sysinfo syscall return address, which is sysinfo_page + 0x440 - and since the sysinfo page hasn't been installed, it's just 0x440. Either way, this is pretty tricky to deal with. We basically have to leave the sysinfo page alone, even though it's sitting in the middle of the client address space. We can't unmap it, because the signal delivery machinery will still keep trying to use it. J |
|
From: <js...@ac...> - 2004-06-01 02:55:57
|
Nightly build on phoenix ( SuSE 8.2 ) started at 2004-06-01 04:00:00 BST Checking out source tree ... done Configuring ... done Building ... done Running regression tests ... done Last 20 lines of log.verbose follow syscall-restart2: valgrind ./syscall-restart2 system: valgrind ./system yield: valgrind ./yield -- Finished tests in none/tests ---------------------------------------- == 153 tests, 12 stderr failures, 0 stdout failures ================= corecheck/tests/as_mmap (stderr) corecheck/tests/fdleak_cmsg (stderr) corecheck/tests/fdleak_creat (stderr) corecheck/tests/fdleak_dup (stderr) corecheck/tests/fdleak_dup2 (stderr) corecheck/tests/fdleak_fcntl (stderr) corecheck/tests/fdleak_ipv4 (stderr) corecheck/tests/fdleak_open (stderr) corecheck/tests/fdleak_pipe (stderr) corecheck/tests/fdleak_socketpair (stderr) memcheck/tests/writev (stderr) memcheck/tests/zeropage (stderr) make: *** [regtest] Error 1 |
|
From: <js...@ac...> - 2004-06-01 02:27:06
|
Nightly build on nemesis ( SuSE 9.0 ) started at 2004-06-01 03:50:00 BST Checking out source tree ... done Configuring ... done Building ... done Running regression tests ... done Last 20 lines of log.verbose follow syscall-restart2: valgrind ./syscall-restart2 system: valgrind ./system yield: valgrind ./yield -- Finished tests in none/tests ---------------------------------------- == 153 tests, 12 stderr failures, 0 stdout failures ================= corecheck/tests/as_mmap (stderr) corecheck/tests/fdleak_cmsg (stderr) corecheck/tests/fdleak_creat (stderr) corecheck/tests/fdleak_dup (stderr) corecheck/tests/fdleak_dup2 (stderr) corecheck/tests/fdleak_fcntl (stderr) corecheck/tests/fdleak_ipv4 (stderr) corecheck/tests/fdleak_open (stderr) corecheck/tests/fdleak_pipe (stderr) corecheck/tests/fdleak_socketpair (stderr) memcheck/tests/writev (stderr) memcheck/tests/zeropage (stderr) make: *** [regtest] Error 1 |