You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
1
(30) |
2
(8) |
3
(5) |
4
(5) |
|
5
(3) |
6
(9) |
7
(5) |
8
(14) |
9
(17) |
10
(27) |
11
(10) |
|
12
(6) |
13
(10) |
14
(7) |
15
(16) |
16
(9) |
17
(14) |
18
(8) |
|
19
(5) |
20
(13) |
21
(21) |
22
(13) |
23
(4) |
24
(1) |
25
(4) |
|
26
(2) |
27
(7) |
28
(4) |
29
(5) |
30
(12) |
|
|
|
From: Mark P. <pa...@un...> - 2015-04-12 23:19:15
|
Hi there, It looks like valgrind HEAD is busted for those of us living on bleeding 10.10.
The two main issues are that the configure.am file is a bit too strict for Apple LLVM checking, and someone made a breaking change to the initimg interfaces without making sure to un-bust initimg-darwin.c
The following patch worked like a charm for myself:
Index: configure.ac
===================================================================
--- configure.ac (revision 15085)
+++ configure.ac (working copy)
@@ -154,7 +154,7 @@
# Note: m4 arguments are quoted with [ and ] so square brackets in shell
# statements have to be quoted.
case "${is_clang}-${gcc_version}" in
- applellvm-5.1|applellvm-6.0*)
+ applellvm-5.1|applellvm-6.*)
AC_MSG_RESULT([ok (Apple LLVM version ${gcc_version})])
;;
icc-1[[3-9]].*)
Index: coregrind/m_initimg/initimg-darwin.c
===================================================================
--- coregrind/m_initimg/initimg-darwin.c (revision 15085)
+++ coregrind/m_initimg/initimg-darwin.c (working copy)
@@ -312,7 +312,8 @@
HChar** orig_envp,
const ExeInfo* info,
Addr clstack_end,
- SizeT clstack_max_size )
+ SizeT clstack_max_size,
+ const VexArchInfo* vex_archinfo )
{
HChar **cpp;
HChar *strtab; /* string table */
@@ -508,7 +509,8 @@
/*====================================================================*/
/* Create the client's initial memory image. */
-IIFinaliseImageInfo VG_(ii_create_image)( IICreateImageInfo iicii )
+IIFinaliseImageInfo VG_(ii_create_image)( IICreateImageInfo iicii,
+ const VexArchInfo* vex_archinfo )
{
ExeInfo info;
VG_(memset)( &info, 0, sizeof(info) );
@@ -548,7 +550,8 @@
iifii.initial_client_SP =
setup_client_stack( iicii.argv - 1, env, &info,
- iicii.clstack_end, iifii.clstack_max_size );
+ iicii.clstack_end, iifii.clstack_max_size,
+ vex_archinfo );
VG_(free)(env);
thanks,
-Pauley
|
|
From: Ivo R. <iv...@iv...> - 2015-04-12 19:45:03
|
2015-04-09 22:03 GMT+02:00 Philippe Waroquiers < phi...@sk...>: > On Thu, 2015-04-09 at 21:58 +0200, Florian Krohm wrote: > > If you want to beat me at it feel free to do so :) > This crashes in a very special case, no urgency :) > I have a patch including test cases ready for review in https://bugs.kde.org/show_bug.cgi?id=345887 After all, it was not so difficult. I. |
|
From: Matthias S. <zz...@ge...> - 2015-04-12 19:01:15
|
On 10.04.2015 20:56, Philippe Waroquiers wrote: > On Fri, 2015-04-10 at 06:47 +0200, Matthias Schwarzott wrote: > >> The check "if (fp_min + 256 >= fp_max)" in coregrind/m_stacktrace.c:501 >> is triggered here. >> >> By changing it to "if (fp_min + 128 >= fp_max)" it can be fixed. >> >> I think amd64 is having problems here because some functions do not need >> additional local variables but can use the redzone, so the stackframes >> are small. > Thanks for this analysis. > Some days ago, I looked at the history of this check. > I saw that the value was already decreased in the past for amd64. > This check is also disabled for Darwin. > > For x86/ppc32/ppc64/arm/arm64, the value is still 512. > > s390x/mips32/mips64/tilegx have no such condition. > > > It is not clear to me what is the purpose of this check. > I did not find an explanation in the svn history. > > I am wondering if that had not as objective to avoid SEGV for bogus > stack pointers. > There was some recent work done to avoid SEGV (e.g. by obtaining > better stack limits for unwinding). > > So, I am not sure that these checks are still useful. > Rather, they might only harm in case we have small but valid > stack frames. > Maybe the number should be dropped in total. Then only the check "fp_min >= fp_max" would remain. Even if I tried, I did not get the whole idea of how stack registering and (main) thread extension works. Regards Matthias |
|
From: Matthias S. <zz...@ge...> - 2015-04-12 18:54:01
|
Hi there!
When executing valgrind automatically on a server, I sometimes wonder if
a process did finish successfully or did call abort (or was killed in
some other way).
when running valgrind with option "-v" it prints this:
==10481== Process terminating with default action of signal 6 (SIGABRT)
==10481== at 0x5085137: kill (syscall-template.S:81)
==10481== by 0x40081B: main (gone.c:26)
But "-v" is too verbose for normal runs :)
So I suggest to always write this and additionally to also do it for xml
output.
I did some experiments. What do you think about something like this in
the xml file:
<fatal_signal>
<tid>1</tid>
<signo>6</signo>
<signame>SIGABRT</signame>
<stack>
<frame>
<ip>0x5084137</ip>
<obj>/lib64/libc-2.20.so</obj>
<fn>kill</fn>
<dir>/var/tmp/portage/sys-libs/glibc-2.20-r2/work/glibc-2.20/signal/../sysdeps/unix</dir>
<file>syscall-template.S</file>
<line>81</line>
</frame>
<frame>
<ip>0x40081B</ip>
<obj>/home/matze/development/valgrind.git/gdbserver_tests/gone</obj>
<fn>main</fn>
<dir>/home/matze/development/valgrind.git/gdbserver_tests</dir>
<file>gone.c</file>
<line>26</line>
</frame>
</stack>
</fatal_signal>
Regards
Matthias
|
Author: sewardj
Date: Sun Apr 12 10:23:58 2015
New Revision: 3126
Log:
Tidyups, no functional change:
* Create RRegSets for caller-saved and callee-saved registers on
amd64, so as to create a single point of reference for that info.
Plumb to use sites.
* Pull out and abstractify logic to compute the set of registers
to spill around NCode calls (calcRegistersToPreserveAroundNCodeCall)
so it becomes arch neutral and move it to host_generic_regs.c.
* fix stupid error in RRegSet__fromVec
Modified:
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_amd64_isel.c
branches/NCODE/priv/host_generic_regs.c
branches/NCODE/priv/host_generic_regs.h
branches/NCODE/priv/main_main.c
branches/NCODE/priv/main_util.h
Modified: branches/NCODE/priv/host_amd64_defs.c
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.c (original)
+++ branches/NCODE/priv/host_amd64_defs.c Sun Apr 12 10:23:58 2015
@@ -101,6 +101,75 @@
}
+/* Returns the registers in the AMD64 universe that are caller saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCallerSaved_AMD64 ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet callerSavedRegs;
+ static Bool callerSavedRegs_initted = False;
+
+ if (LIKELY(callerSavedRegs_initted))
+ return &callerSavedRegs;
+
+ RRegSet__init(&callerSavedRegs, getRRegUniverse_AMD64());
+
+ RRegSet__add(&callerSavedRegs, hregAMD64_RAX());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RCX());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RDX());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RSI());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RDI());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R8());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R9());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R10());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R11());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM0());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM1());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM3());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM4());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM5());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM6());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM7());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM8());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM9());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM10());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM11());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM12());
+
+ callerSavedRegs_initted = True;
+ return &callerSavedRegs;
+}
+
+
+/* Returns the registers in the AMD64 universe that are callee saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCalleeSaved_AMD64 ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet calleeSavedRegs;
+ static Bool calleeSavedRegs_initted = False;
+
+ if (LIKELY(calleeSavedRegs_initted))
+ return &calleeSavedRegs;
+
+ RRegSet__init(&calleeSavedRegs, getRRegUniverse_AMD64());
+
+ RRegSet__add(&calleeSavedRegs, hregAMD64_RBX());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_RBP());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R12());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R13());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R14());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R15());
+
+ calleeSavedRegs_initted = True;
+ return &calleeSavedRegs;
+}
+
+
void ppHRegAMD64 ( HReg reg )
{
Int r;
@@ -1548,31 +1617,9 @@
/* This is a bit subtle. */
/* First off, claim it trashes all the caller-saved regs
which fall within the register allocator's jurisdiction.
- These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
- and all the xmm registers.
+ These I believe to be: rsi rdi r8 r9 r10 xmm3..xmm12.
*/
- addHRegUse(u, HRmWrite, hregAMD64_RAX());
- addHRegUse(u, HRmWrite, hregAMD64_RCX());
- addHRegUse(u, HRmWrite, hregAMD64_RDX());
- addHRegUse(u, HRmWrite, hregAMD64_RSI());
- addHRegUse(u, HRmWrite, hregAMD64_RDI());
- addHRegUse(u, HRmWrite, hregAMD64_R8());
- addHRegUse(u, HRmWrite, hregAMD64_R9());
- addHRegUse(u, HRmWrite, hregAMD64_R10());
- addHRegUse(u, HRmWrite, hregAMD64_R11());
- addHRegUse(u, HRmWrite, hregAMD64_XMM0());
- addHRegUse(u, HRmWrite, hregAMD64_XMM1());
- addHRegUse(u, HRmWrite, hregAMD64_XMM3());
- addHRegUse(u, HRmWrite, hregAMD64_XMM4());
- addHRegUse(u, HRmWrite, hregAMD64_XMM5());
- addHRegUse(u, HRmWrite, hregAMD64_XMM6());
- addHRegUse(u, HRmWrite, hregAMD64_XMM7());
- addHRegUse(u, HRmWrite, hregAMD64_XMM8());
- addHRegUse(u, HRmWrite, hregAMD64_XMM9());
- addHRegUse(u, HRmWrite, hregAMD64_XMM10());
- addHRegUse(u, HRmWrite, hregAMD64_XMM11());
- addHRegUse(u, HRmWrite, hregAMD64_XMM12());
-
+ addHRegUse_from_RRegSet(u, HRmWrite, getRRegsCallerSaved_AMD64());
/* Now we have to state any parameter-carrying registers
which might be read. This depends on the regparmness. */
switch (i->Ain.Call.regparms) {
@@ -3981,251 +4028,9 @@
so it's already out of commission as far as regalloc is concerned.
So we can safely use it here, when needed. */
-/* A handy structure to hold the register environment. */
-typedef
- struct {
- UInt nRegsR;
- const HReg* regsR;
- UInt nRegsA;
- const HReg* regsA;
- UInt nRegsS;
- const HReg* regsS;
- }
- NRegMap;
-
-/* fwds */
-static void emit_AMD64NInstr ( /*MOD*/AssemblyBuffer* ab,
- /*MOD*/RelocationBuffer* rb,
- const NInstr* ni,
- const NRegMap* nregMap,
- const RRegSet* rrLiveAfter,
- /* for debug printing only */
- Bool verbose, NLabel niLabel );
-
-static UInt hregVecLen ( const HReg* vec )
-{
- UInt i;
- for (i = 0; !hregIsInvalid(vec[i]); i++)
- ;
- return i;
-}
-
-/* Generate the AMD64 NCode instruction |hi| into |ab_hot| and
- |ab_cold|. This can only handle NCode blocks. All other AMD64
- instructions are to be handled by emit_AMD64Instr. This is
- required to generate <= 1024 bytes of code. Returns True if OK,
- False if not enough buffer space. */
-
-Bool emit_AMD64NCode ( /*MOD*/AssemblyBuffer* ab_hot,
- /*MOD*/AssemblyBuffer* ab_cold,
- /*MOD*/RelocationBuffer* rb,
- const AMD64Instr* hi,
- Bool mode64, VexEndness endness_host,
- Bool verbose )
-{
- vassert(mode64 == True);
- vassert(endness_host == VexEndnessLE);
- vassert(hi->tag == Ain_NCode);
-
- const AMD64InstrNCode* hi_details = hi->Ain.NCode.details;
- const NCodeTemplate* tmpl = hi_details->tmpl;
- const RRegSet* rregsLiveAfter = hi_details->rrLiveAfter;
- const RRegUniverse* univ = RRegSet__getUniverse(rregsLiveAfter);
-
- NRegMap nregMap;
- nregMap.regsR = hi_details->regsR;
- nregMap.regsA = hi_details->regsA;
- nregMap.regsS = hi_details->regsS;
- nregMap.nRegsR = tmpl->nres;
- nregMap.nRegsA = tmpl->narg;
- nregMap.nRegsS = tmpl->nscr;
-
- vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
- vassert(hregVecLen(nregMap.regsA) == nregMap.nRegsA);
- vassert(hregVecLen(nregMap.regsS) == nregMap.nRegsS);
-
- if (AssemblyBuffer__getRemainingSize(ab_hot) < 1024)
- return False;
- if (AssemblyBuffer__getRemainingSize(ab_cold) < 1024)
- return False;
- if (RelocationBuffer__getRemainingSize(rb) < 128)
- return False;
-
- /* Count how many hot and cold instructions (NInstrs) the template
- has, since we'll need to allocate temporary arrays to keep track
- of the label offsets. */
- UInt nHot, nCold;
- for (nHot = 0; tmpl->hot[nHot]; nHot++)
- ;
- for (nCold = 0; tmpl->cold[nCold]; nCold++)
- ;
-
- /* Here are our two arrays for tracking the AssemblyBuffer offsets
- of the NCode instructions. */
- UInt i;
- UInt offsetsHot[nHot];
- UInt offsetsCold[nCold];
- for (i = 0; i < nHot; i++) offsetsHot[i] = 0;
- for (i = 0; i < nCold; i++) offsetsCold[i] = 0;
-
- /* We'll be adding entries to the relocation buffer, |rb|, and will
- need to adjust their |dst| fields after generation of the hot
- and cold code. Record therefore where we are in the buffer now,
- so that we can iterate over the new entries later. */
- UInt rb_first = RelocationBuffer__getNext(rb);
-
- /* Generate the hot code */
- for (i = 0; i < nHot; i++) {
- offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
- NLabel lbl = mkNLabel(Nlz_Hot, i);
- emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* And the cold code */
- for (i = 0; i < nCold; i++) {
- offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
- NLabel lbl = mkNLabel(Nlz_Cold, i);
- emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* Now visit the new relocation entries. */
- UInt rb_last1 = RelocationBuffer__getNext(rb);
-
- for (i = rb_first; i < rb_last1; i++) {
- Relocation* reloc = &rb->buf[i];
-
- /* Show the reloc before the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
-
- /* Transform the destination component of |reloc| so that it no
- longer refers to a label but rather to an offset in the hot
- or cold assembly buffer. */
- vassert(!reloc->dst.isOffset);
- reloc->dst.isOffset = True;
-
- if (reloc->dst.zone == Nlz_Hot) {
- vassert(reloc->dst.num < nHot);
- reloc->dst.num = offsetsHot[reloc->dst.num];
- } else {
- vassert(reloc->dst.zone == Nlz_Cold);
- vassert(reloc->dst.num < nCold);
- reloc->dst.num = offsetsCold[reloc->dst.num];
- }
-
- /* Show the reloc after the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
- }
-
- if (0) {
- HReg r10 = hregAMD64_R10();
- HReg rax = hregAMD64_RAX();
- HReg rbx = hregAMD64_RBX();
- HReg rcx = hregAMD64_RCX();
- HReg rdx = hregAMD64_RDX();
-
- RRegSet* rs = RRegSet__new(univ);
- vex_printf("\n__new\n");
- vex_printf("1: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__add\n");
- RRegSet__add(rs, rbx);
- vex_printf("2: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rdx);
- vex_printf("3: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rcx);
- vex_printf("4: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rcx);
- vex_printf("5: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, r10);
- vex_printf("6: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rax);
- vex_printf("7: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__fromVec\n");
- const HReg vec[4] = { rdx, rcx, rbx, rax };
- RRegSet__fromVec(rs, vec, 0);
- vex_printf("8: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__fromVec(rs, vec, 4);
- vex_printf("9: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__del\n");
- RRegSet__del(rs, rcx);
- vex_printf("10: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rcx);
- vex_printf("11: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rbx);
- vex_printf("12: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rax);
- vex_printf("13: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rdx);
- vex_printf("14: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rdx);
- vex_printf("15: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
-
- vex_printf("\n__plus\n");
- RRegSet* rs2 = RRegSet__new(univ);
- RRegSet__add(rs, r10); RRegSet__add(rs, rax);
- RRegSet__add(rs2, rbx); RRegSet__add(rs2, rcx); RRegSet__add(rs2, rax);
-
- RRegSet__plus(rs2, rs);
- vex_printf("16a: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
- vex_printf("16b: "); RRegSet__pp(rs2, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__minus\n");
- RRegSet__minus(rs, rs2);
- vex_printf("17: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- }
-
- return True;
-}
-
-/* Find the real (hard) register for |r| by looking up in |map|. */
-static HReg mapNReg ( const NRegMap* map, NReg r )
-{
- UInt limit = 0;
- const HReg* arr = NULL;
- switch (r.role) {
- case Nrr_Result: limit = map->nRegsR; arr = map->regsR; break;
- case Nrr_Argument: limit = map->nRegsA; arr = map->regsA; break;
- case Nrr_Scratch: limit = map->nRegsS; arr = map->regsS; break;
- default: vpanic("mapNReg: invalid reg role");
- }
- vassert(r.num < limit);
- return arr[r.num];
-}
-
-/* ***FIXME*** this is an exact copy of the same in host_amd64_isel.c. */
-static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
-{
- vassert(hregClass(src) == HRcInt64);
- vassert(hregClass(dst) == HRcInt64);
- return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
-}
-
-
+/* Emits AMD64 code for a single NInstr |ni| into |ab|, possibly
+ adding relocation information into |rb| too.
+*/
static
void emit_AMD64NInstr ( /*MOD*/AssemblyBuffer* ab,
/*MOD*/RelocationBuffer* rb,
@@ -4298,78 +4103,15 @@
}
case Nin_Call: {
- /* The main difficulty here is to figure out the minimal set
- of registers to save across the call. As far as I can see, the
- set is:
-
- (1) registers live after this NCode block
- (2) + the Arg, Res and Scratch registers for this block
- (3) - Abi_Callee_Saved registers
- (4) - the Arg/Res/Scratch register(s) into which this call
- will place its results
-
- (1) because that's the set of regs that reg-alloc expects to
- not be trashed by the NCode block
- (2) because Arg/Res/Scratch regs can be used freely within the
- NCode block, so we have to keep them alive
- (3) because preserving Callee saved regs is obviously pointless
- (4) because preserving the call's result reg(s) will result in
- the restore sequence overwriting the result of the call
-
- Figuring out (1) is tricky and is something that reg-alloc
- needs to tell us. I think it's safe to start with an
- overestimate of (1) -- for example, all regs available to
- reg-alloc -- and refine it later.
- */
- const RRegUniverse* univ = RRegSet__getUniverse(hregsLiveAfter);
- const RRegSet* set_1 = hregsLiveAfter;
-
- RRegSet* set_2 = RRegSet__new(univ);
- { UInt i;
- for (i = 0; i < nregMap->nRegsR; i++)
- RRegSet__add(set_2, nregMap->regsR[i]);
- for (i = 0; i < nregMap->nRegsA; i++)
- RRegSet__add(set_2, nregMap->regsA[i]);
- for (i = 0; i < nregMap->nRegsS; i++)
- RRegSet__add(set_2, nregMap->regsS[i]);
- }
-
- RRegSet* set_3 = RRegSet__new(univ);
- // callee-saves: rbx rbp r12 r13 r14 r15
- { HReg vec[6];
- vec[0] = hregAMD64_RBX(); vec[1] = hregAMD64_RBP();
- vec[2] = hregAMD64_R12(); vec[3] = hregAMD64_R13();
- vec[4] = hregAMD64_R14(); vec[5] = hregAMD64_R15();
- RRegSet__fromVec(set_3, vec, sizeof(vec)/sizeof(vec[0]));
- }
-
- RRegSet* set_4 = RRegSet__new(univ);
- if (!isNRegINVALID(ni->Nin.Call.resHi))
- RRegSet__add(set_4, mapNReg(nregMap, ni->Nin.Call.resHi));
- if (!isNRegINVALID(ni->Nin.Call.resLo))
- RRegSet__add(set_4, mapNReg(nregMap, ni->Nin.Call.resLo));
-
- RRegSet* to_preserve = RRegSet__new(univ);
- RRegSet__copy(to_preserve, set_1);
- RRegSet__plus(to_preserve, set_2);
- RRegSet__minus(to_preserve, set_3);
- RRegSet__minus(to_preserve, set_4);
-
- if (verbose) {
- vex_printf(" # set1: ");
- RRegSet__pp(set_1, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # set2: ");
- RRegSet__pp(set_2, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # set3: ");
- RRegSet__pp(set_3, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # set4: ");
- RRegSet__pp(set_4, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # pres: ");
- RRegSet__pp(to_preserve, ppHRegAMD64); vex_printf("\n");
- }
+ RRegSet to_preserve;
+ calcRegistersToPreserveAroundNCodeCall(
+ &to_preserve,
+ hregsLiveAfter, getRRegsCalleeSaved_AMD64(), nregMap,
+ ni->Nin.Call.resHi, ni->Nin.Call.resLo
+ );
/* Save live regs */
- UInt n_to_preserve = RRegSet__card(to_preserve);
+ UInt n_to_preserve = RRegSet__card(&to_preserve);
vassert(n_to_preserve < 25); /* stay sane */
/* Figure out how much to move the stack, ensuring any alignment up
@@ -4382,7 +4124,7 @@
}
RRegSetIterator* iter = RRegSetIterator__new();
- RRegSetIterator__init(iter, to_preserve);
+ RRegSetIterator__init(iter, &to_preserve);
UInt slotNo = 0;
while (True) {
HReg r = RRegSetIterator__next(iter);
@@ -4426,7 +4168,7 @@
}
/* Restore live regs */
- RRegSetIterator__init(iter, to_preserve);
+ RRegSetIterator__init(iter, &to_preserve);
slotNo = 0;
while (True) {
HReg r = RRegSetIterator__next(iter);
@@ -4582,6 +4324,127 @@
}
+/* Emits AMD64 code for the complete NCode block |hi| into |ab_hot|
+ and |ab_cold|, possibly adding relocation information to |rb| too.
+ This function can only handle NCode blocks. All other AMD64
+ instructions are to be handled by emit_AMD64Instr. This function
+ is required to generate <= 1024 bytes of code. Returns True if OK,
+ False if not enough buffer space.
+*/
+Bool emit_AMD64NCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const AMD64Instr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose )
+{
+ vassert(mode64 == True);
+ vassert(endness_host == VexEndnessLE);
+ vassert(hi->tag == Ain_NCode);
+
+ const AMD64InstrNCode* hi_details = hi->Ain.NCode.details;
+ const NCodeTemplate* tmpl = hi_details->tmpl;
+ const RRegSet* rregsLiveAfter = hi_details->rrLiveAfter;
+ const RRegUniverse* univ = RRegSet__getUniverse(rregsLiveAfter);
+
+ NRegMap nregMap;
+ nregMap.regsR = hi_details->regsR;
+ nregMap.regsA = hi_details->regsA;
+ nregMap.regsS = hi_details->regsS;
+ nregMap.nRegsR = tmpl->nres;
+ nregMap.nRegsA = tmpl->narg;
+ nregMap.nRegsS = tmpl->nscr;
+
+ vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
+ vassert(hregVecLen(nregMap.regsA) == nregMap.nRegsA);
+ vassert(hregVecLen(nregMap.regsS) == nregMap.nRegsS);
+
+ if (AssemblyBuffer__getRemainingSize(ab_hot) < 1024)
+ return False;
+ if (AssemblyBuffer__getRemainingSize(ab_cold) < 1024)
+ return False;
+ if (RelocationBuffer__getRemainingSize(rb) < 128)
+ return False;
+
+ /* Count how many hot and cold instructions (NInstrs) the template
+ has, since we'll need to allocate temporary arrays to keep track
+ of the label offsets. */
+ UInt nHot, nCold;
+ for (nHot = 0; tmpl->hot[nHot]; nHot++)
+ ;
+ for (nCold = 0; tmpl->cold[nCold]; nCold++)
+ ;
+
+ /* Here are our two arrays for tracking the AssemblyBuffer offsets
+ of the NCode instructions. */
+ UInt i;
+ UInt offsetsHot[nHot];
+ UInt offsetsCold[nCold];
+ for (i = 0; i < nHot; i++) offsetsHot[i] = 0;
+ for (i = 0; i < nCold; i++) offsetsCold[i] = 0;
+
+ /* We'll be adding entries to the relocation buffer, |rb|, and will
+ need to adjust their |dst| fields after generation of the hot
+ and cold code. Record therefore where we are in the buffer now,
+ so that we can iterate over the new entries later. */
+ UInt rb_first = RelocationBuffer__getNext(rb);
+
+ /* Generate the hot code */
+ for (i = 0; i < nHot; i++) {
+ offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
+ NLabel lbl = mkNLabel(Nlz_Hot, i);
+ emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
+ rregsLiveAfter, verbose, lbl);
+ }
+
+ /* And the cold code */
+ for (i = 0; i < nCold; i++) {
+ offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
+ NLabel lbl = mkNLabel(Nlz_Cold, i);
+ emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
+ rregsLiveAfter, verbose, lbl);
+ }
+
+ /* Now visit the new relocation entries. */
+ UInt rb_last1 = RelocationBuffer__getNext(rb);
+
+ for (i = rb_first; i < rb_last1; i++) {
+ Relocation* reloc = &rb->buf[i];
+
+ /* Show the reloc before the label-to-offset transformation. */
+ if (verbose) {
+ vex_printf(" reloc: ");
+ ppRelocation(reloc);
+ vex_printf("\n");
+ }
+
+ /* Transform the destination component of |reloc| so that it no
+ longer refers to a label but rather to an offset in the hot
+ or cold assembly buffer. */
+ vassert(!reloc->dst.isOffset);
+ reloc->dst.isOffset = True;
+
+ if (reloc->dst.zone == Nlz_Hot) {
+ vassert(reloc->dst.num < nHot);
+ reloc->dst.num = offsetsHot[reloc->dst.num];
+ } else {
+ vassert(reloc->dst.zone == Nlz_Cold);
+ vassert(reloc->dst.num < nCold);
+ reloc->dst.num = offsetsCold[reloc->dst.num];
+ }
+
+ /* Show the reloc after the label-to-offset transformation. */
+ if (verbose) {
+ vex_printf(" reloc: ");
+ ppRelocation(reloc);
+ vex_printf("\n");
+ }
+ }
+
+ return True;
+}
+
+
/* --------- Helpers for translation chaining. --------- */
/* How big is an event check? See case for Ain_EvCheck in
Modified: branches/NCODE/priv/host_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.h (original)
+++ branches/NCODE/priv/host_amd64_defs.h Sun Apr 12 10:23:58 2015
@@ -830,6 +830,9 @@
extern void ppAMD64Instr ( const AMD64Instr*, Bool );
+/* Handy helper, for generating integer reg-reg moves. */
+extern AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst );
+
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool );
@@ -839,12 +842,12 @@
const AMD64Instr*, Bool, VexEndness,
const VexDispatcherAddresses* );
-extern Bool emit_AMD64NCode ( /*MOD*/AssemblyBuffer* ab_hot,
- /*MOD*/AssemblyBuffer* ab_cold,
- /*MOD*/RelocationBuffer* rb,
- const AMD64Instr* hi,
- Bool mode64, VexEndness endness_host,
- Bool verbose );
+extern Bool emit_AMD64NCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const AMD64Instr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose );
extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Bool spRel, Int offset, Bool );
Modified: branches/NCODE/priv/host_amd64_isel.c
==============================================================================
--- branches/NCODE/priv/host_amd64_isel.c (original)
+++ branches/NCODE/priv/host_amd64_isel.c Sun Apr 12 10:23:58 2015
@@ -309,9 +309,9 @@
&& e->Iex.Const.con->Ico.U32 == 0;
}
-/* Make a int reg-reg move. */
+/* Make an int reg-reg move. */
-static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
+/*notstatic*/ AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcInt64);
vassert(hregClass(dst) == HRcInt64);
Modified: branches/NCODE/priv/host_generic_regs.c
==============================================================================
--- branches/NCODE/priv/host_generic_regs.c (original)
+++ branches/NCODE/priv/host_generic_regs.c Sun Apr 12 10:23:58 2015
@@ -120,16 +120,6 @@
/*--- Real register sets ---*/
/*---------------------------------------------------------*/
-/* Represents sets of real registers. |bits| is interpreted in the
- context of |univ|. That is, each bit index |i| in |bits|
- corresponds to the register |univ->regs[i]|. This relies
- entirely on the fact that N_RREGUNIVERSE_REGS <= 64.
-*/
-struct _RRegSet {
- ULong bits;
- const RRegUniverse* univ;
-};
-
STATIC_ASSERT(N_RREGUNIVERSE_REGS <= 8 * sizeof(ULong));
/* Print a register set, using the arch-specific register printing
@@ -153,13 +143,19 @@
vex_printf("}");
}
-/* Create a new, empty, set. */
+/* Initialise an RRegSet, making it empty. */
+inline void RRegSet__init ( /*OUT*/RRegSet* set, const RRegUniverse* univ )
+{
+ set->bits = 0;
+ set->univ = univ;
+}
+
+/* Create a new, empty, set, in the normal (transient) heap. */
RRegSet* RRegSet__new ( const RRegUniverse* univ )
{
vassert(univ);
RRegSet* set = LibVEX_Alloc_inline(sizeof(RRegSet));
- set->bits = 0;
- set->univ = univ;
+ RRegSet__init(set, univ);
return set;
}
@@ -174,6 +170,7 @@
duplicates. */
void RRegSet__fromVec ( /*MOD*/RRegSet* dst, const HReg* vec, UInt nVec )
{
+ dst->bits = 0;
for (UInt i = 0; i < nVec; i++) {
HReg r = vec[i];
vassert(!hregIsInvalid(r) && !hregIsVirtual(r));
@@ -229,6 +226,22 @@
return __builtin_popcountll(set->bits);
}
+/* Remove non-allocatable registers from this set. Because the set
+ carries its register universe, we can consult that to find the
+ non-allocatable registers, so no other parameters are needed. */
+void RRegSet__deleteNonAllocatable ( /*MOD*/RRegSet* set )
+{
+ const RRegUniverse* univ = set->univ;
+ UInt allocable = univ->allocable;
+ if (UNLIKELY(allocable == N_RREGUNIVERSE_REGS)) {
+ return;
+ /* otherwise we'd get an out-of-range shift below */
+ }
+ vassert(allocable > 0 && allocable < N_RREGUNIVERSE_REGS);
+ ULong mask = (1ULL << allocable) - 1;
+ set->bits &= mask;
+}
+
struct _RRegSetIterator {
const RRegSet* set;
@@ -398,6 +411,20 @@
/*NOTREACHED*/
}
+void addHRegUse_from_RRegSet ( HRegUsage* tab,
+ HRegMode mode, const RRegSet* set )
+{
+ STATIC_ASSERT(sizeof(tab->rRead) == sizeof(tab->rWritten));
+ STATIC_ASSERT(sizeof(tab->rRead) == sizeof(set->bits));
+ switch (mode) {
+ case HRmRead: tab->rRead |= set->bits; break;
+ case HRmWrite: tab->rWritten |= set->bits; break;
+ case HRmModify: tab->rRead |= set->bits;
+ tab->rWritten |= set->bits; break;
+ default: vassert(0);
+ }
+}
+
/*---------------------------------------------------------*/
/*--- Indicating register remappings (for reg-alloc) ---*/
@@ -531,6 +558,128 @@
}
+/*---------------------------------------------------------*/
+/*--- NCode generation helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Find the length of a vector of HRegs that is terminated by
+ an HReg_INVALID. */
+UInt hregVecLen ( const HReg* vec )
+{
+ UInt i;
+ for (i = 0; !hregIsInvalid(vec[i]); i++)
+ ;
+ return i;
+}
+
+
+/* Find the real (hard) register for |r| by looking up in |map|. */
+HReg mapNReg ( const NRegMap* map, NReg r )
+{
+ UInt limit = 0;
+ const HReg* arr = NULL;
+ switch (r.role) {
+ case Nrr_Result: limit = map->nRegsR; arr = map->regsR; break;
+ case Nrr_Argument: limit = map->nRegsA; arr = map->regsA; break;
+ case Nrr_Scratch: limit = map->nRegsS; arr = map->regsS; break;
+ default: vpanic("mapNReg: invalid reg role");
+ }
+ vassert(r.num < limit);
+ return arr[r.num];
+}
+
+
+/* Compute the minimal set of registers to preserve around calls
+ embedded within NCode blocks. */
+void calcRegistersToPreserveAroundNCodeCall (
+ /*OUT*/RRegSet* result,
+ const RRegSet* hregsLiveAfterTheNCodeBlock,
+ const RRegSet* abiCallerSavedRegs,
+ const NRegMap* nregMap,
+ NReg nregResHi,
+ NReg nregResLo
+ )
+{
+ /* This function deals with one of the main difficulties of NCode
+ templates, which is that of figuring out the minimal set of
+ registers to save across calls embedded inside NCode blocks. As
+ far as I can see, the set is:
+
+ (1) registers live after the NCode block
+ (2) + the Arg, Res and Scratch registers for the block
+ (3) - Abi_Callee_Saved registers
+ (4) - the Arg/Res/Scratch register(s) into which the call
+ will place its results
+
+ (1) because that's the set of regs that reg-alloc expects to
+ not be trashed by the NCode block
+ (2) because Arg/Res/Scratch regs can be used freely within the
+ NCode block, so we have to keep them alive
+ (3) because preserving Callee saved regs is obviously pointless
+ (4) because preserving the call's result reg(s) will result in
+ the restore sequence overwriting the result of the call
+
+ (2) (3) (4) are either constants or something we can find from
+ inspection of the relevant NInstr (call) alone. (1) is
+ something that depends on instructions after the NCode block
+ and so is something that the register allocator has to tell us.
+
+ Another detail is that we remove from the set, all registers not
+ available to the register allocator. That is, we save across
+ the call, only registers available to the allocator. That
+ assumes that all fixed-use or otherwise-not-allocatable
+ registers, that we care about, are callee-saved. AFAIK the only
+ important register is the baseblock register, and that is indeed
+ callee-saved on all targets.
+ */
+ const RRegUniverse* univ
+ = RRegSet__getUniverse(hregsLiveAfterTheNCodeBlock);
+
+ const RRegSet* set_1 = hregsLiveAfterTheNCodeBlock;
+
+ RRegSet set_2;
+ RRegSet__init(&set_2, univ);
+ for (UInt i = 0; i < nregMap->nRegsR; i++)
+ RRegSet__add(&set_2, nregMap->regsR[i]);
+ for (UInt i = 0; i < nregMap->nRegsA; i++)
+ RRegSet__add(&set_2, nregMap->regsA[i]);
+ for (UInt i = 0; i < nregMap->nRegsS; i++)
+ RRegSet__add(&set_2, nregMap->regsS[i]);
+
+ const RRegSet* set_3 = abiCallerSavedRegs;
+ vassert(univ == RRegSet__getUniverse(set_3));
+
+ RRegSet set_4;
+ RRegSet__init(&set_4, univ);
+ if (!isNRegINVALID(nregResHi))
+ RRegSet__add(&set_4, mapNReg(nregMap, nregResHi));
+ if (!isNRegINVALID(nregResLo))
+ RRegSet__add(&set_4, mapNReg(nregMap, nregResLo));
+
+ RRegSet__init(result, univ);
+ RRegSet__copy(result, set_1);
+ RRegSet__plus(result, &set_2);
+ RRegSet__minus(result, set_3);
+ RRegSet__minus(result, &set_4);
+
+ if (0) {
+ vex_printf(" # set1: ");
+ RRegSet__pp(set_1, ppHReg); vex_printf("\n");
+ vex_printf(" # set2: ");
+ RRegSet__pp(&set_2, ppHReg); vex_printf("\n");
+ vex_printf(" # set3: ");
+ RRegSet__pp(set_3, ppHReg); vex_printf("\n");
+ vex_printf(" # set4: ");
+ RRegSet__pp(&set_4, ppHReg); vex_printf("\n");
+ vex_printf(" # pres: ");
+ RRegSet__pp(result, ppHReg); vex_printf("\n");
+ }
+
+ /* Remove any non allocatable registers (see big comment above) */
+ RRegSet__deleteNonAllocatable(result);
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_generic_regs.c ---*/
/*---------------------------------------------------------------*/
Modified: branches/NCODE/priv/host_generic_regs.h
==============================================================================
--- branches/NCODE/priv/host_generic_regs.h (original)
+++ branches/NCODE/priv/host_generic_regs.h Sun Apr 12 10:23:58 2015
@@ -238,14 +238,34 @@
/*--- Real Register Sets ---*/
/*---------------------------------------------------------*/
-/* ABSTYPE */
-typedef struct _RRegSet RRegSet;
+/* Represents sets of real registers. |bits| is interpreted in the
+ context of |univ|. That is, each bit index |i| in |bits|
+ corresponds to the register |univ->regs[i]|. This relies
+ entirely on the fact that N_RREGUNIVERSE_REGS <= 64.
+
+ It would have been nice to have been able to make this abstract,
+ but it is necessary to declare globals of this type. Hence the
+ size has to be known to the users of the type and so it can't be
+ abstract.
+*/
+typedef
+ struct {
+ ULong bits;
+ const RRegUniverse* univ;
+ }
+ RRegSet;
+
+STATIC_ASSERT(N_RREGUNIVERSE_REGS <= 8 * sizeof(ULong));
+
/* Print a register set, using the arch-specific register printing
function |regPrinter| supplied. */
extern void RRegSet__pp ( const RRegSet* set, void (*regPrinter)(HReg) );
-/* Create a new, empty, set. */
+/* Initialise an RRegSet, making it empty. */
+extern void RRegSet__init ( /*OUT*/RRegSet* set, const RRegUniverse* univ );
+
+/* Create a new, empty, set, in the normal (transient) heap. */
extern RRegSet* RRegSet__new ( const RRegUniverse* univ );
/* Return the RRegUniverse for a given RRegSet. */
@@ -275,6 +295,11 @@
/* Returns the number of elements in |set|. */
extern UInt RRegSet__card ( const RRegSet* set );
+/* Remove non-allocatable registers from this set. Because the set
+ carries its register universe, we can consult that to find the
+ non-allocatable registers, so no other parameters are needed. */
+extern void RRegSet__deleteNonAllocatable ( /*MOD*/RRegSet* set );
+
/* Iterating over RRegSets. */
/* ABSTYPE */
@@ -344,6 +369,9 @@
extern Bool HRegUsage__contains ( const HRegUsage*, HReg );
+extern void addHRegUse_from_RRegSet ( HRegUsage*, HRegMode, const RRegSet* );
+
+
/*---------------------------------------------------------*/
/*--- Indicating register remappings (for reg-alloc) ---*/
/*---------------------------------------------------------*/
@@ -702,6 +730,46 @@
);
+/*---------------------------------------------------------*/
+/*--- NCode generation helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Find the length of a vector of HRegs that is terminated by
+ an HReg_INVALID. */
+extern UInt hregVecLen ( const HReg* vec );
+
+
+/* A handy structure to hold the register environment for an NCode
+ block -- that is, the NReg to HReg mapping. */
+typedef
+ struct {
+ UInt nRegsR;
+ const HReg* regsR;
+ UInt nRegsA;
+ const HReg* regsA;
+ UInt nRegsS;
+ const HReg* regsS;
+ }
+ NRegMap;
+
+/* Find the real (hard) register for |r| by looking up in |map|. */
+extern HReg mapNReg ( const NRegMap* map, NReg r );
+
+
+/* Compute the minimal set of registers to preserve around calls
+ embedded within NCode blocks. See implementation for a detailed
+ comment. */
+extern
+void calcRegistersToPreserveAroundNCodeCall (
+ /*OUT*/RRegSet* result,
+ const RRegSet* hregsLiveAfterTheNCodeBlock,
+ const RRegSet* abiCallerSavedRegs,
+ const NRegMap* nregMap,
+ NReg nregResHi,
+ NReg nregResLo
+ );
+
+
#endif /* ndef __VEX_HOST_GENERIC_REGS_H */
/*---------------------------------------------------------------*/
Modified: branches/NCODE/priv/main_main.c
==============================================================================
--- branches/NCODE/priv/main_main.c (original)
+++ branches/NCODE/priv/main_main.c Sun Apr 12 10:23:58 2015
@@ -1128,9 +1128,9 @@
if (UNLIKELY( AssemblyBuffer__getRemainingSize(&ab_hot) < 1024 )
|| UNLIKELY( AssemblyBuffer__getRemainingSize(&ab_cold) < 1024 ))
goto outputBufferFull;
- Bool ok = emit_AMD64NCode ( &ab_hot, &ab_cold, &rb, hi,
- mode64, vta->archinfo_host.endness,
- !!(vex_traceflags & VEX_TRACE_ASM));
+ Bool ok = emit_AMD64NCodeBlock ( &ab_hot, &ab_cold, &rb, hi,
+ mode64, vta->archinfo_host.endness,
+ !!(vex_traceflags & VEX_TRACE_ASM));
if (!ok)
goto outputBufferFull;
}
Modified: branches/NCODE/priv/main_util.h
==============================================================================
--- branches/NCODE/priv/main_util.h (original)
+++ branches/NCODE/priv/main_util.h Sun Apr 12 10:23:58 2015
@@ -51,7 +51,8 @@
#endif
// Poor man's static assert
-#define STATIC_ASSERT(x) extern int vex__unused_array[(x) ? 1 : -1]
+#define STATIC_ASSERT(x) extern int vex__unused_array[(x) ? 1 : -1] \
+ __attribute__((unused))
/* Stuff for panicking and assertion. */
|
|
From: Zhigang L. <zl...@ez...> - 2015-04-12 05:24:51
|
________________________________________
From: Philippe Waroquiers <phi...@sk...>
Sent: Saturday, April 11, 2015 7:30 AM
To: Zhigang Liu
Cc: Valgrind Developers
Subject: Re: [Valgrind-developers] I need access to a TILEGX :) : libvexmultiarch_test failing with TILEGX host
On Sat, 2015-04-11 at 01:12 +0200, Philippe Waroquiers wrote:
> On Sat, 2015-04-11 at 01:02 +0200, Philippe Waroquiers wrote:
> > Julian,
> > do you agree that the offB_HOST_* offsets are depending on the host
> > architecture, and not on the guest architecture ?
> Moving the offB_HOST_* to the arch_host switch makes
> guest amd64/host tilegx
> work ok.
>
> It looks to me that this is the good thing to do
After an irc discussion with Julian, it became clear that this
is not the good thing to do, and that I misunderstood
the somewhat misleading names offB_HOST_EvC_COUNTER and
offB_HOST_EvC_FAILADDR.
Here is what I understand now:
These offB_HOST_* are really offset in the guest state,
which give locations in the guest state that are used by the
(generated) host code.
Basically, a translation entry (generated host code) is doing
if (-- guest_state->COUNTER) == 0) goto guest_state->FAILADDR
So, COUNTER and FAILADDR are in the guest state.
FAILADDR must be an host address
(this is in fact wrongly defined in all 32 bits guest states.
E.g. libvex_guest_x86.h and libvex_guest_ppc32.h defines
UInt host_EvC_FAILADDR;
while it should be the size of an host address (or at least
big enough to hold a 64 bit host address, if the host would
be 64 bits in a multiarch setup).
So, now I think the problem guest amd64/host tilegx
is better solved in the host tilegx code, that should ensure to always
generate the same nr of bytes for the evCheck instructions
(this was suggested by Zhigang)
(or maybe dynamically compute
the needed nr of instructions for an eventcheck, depending
on the offsets of the host_EvC_*, that changes the size of the
instructions).
Zhigang, does the above look reasonable to do in tilegx ?
Yes, thank you for finding this issue. I have a simple patch for this, would you mind to have a try.
Thanks
--- ZhiGang
******* Begin of the patch ******
Index: priv/host_tilegx_defs.c
===================================================================
--- priv/host_tilegx_defs.c (revision 3125)
+++ priv/host_tilegx_defs.c (working copy)
@@ -1348,11 +1348,10 @@
static UChar *doAMode_IR ( UChar * p, UInt opc1, UInt rSD, TILEGXAMode * am )
{
- UInt rA; //, idx;
+ UInt rA;
vassert(am->tag == GXam_IR);
rA = iregNo(am->GXam.IR.base);
- //idx = am->GXam.IR.index;
if (opc1 == TILEGX_OPC_ST1 || opc1 == TILEGX_OPC_ST2 ||
opc1 == TILEGX_OPC_ST4 || opc1 == TILEGX_OPC_ST) {
@@ -1381,19 +1380,29 @@
return p;
}
-/* Generate a machine-word sized load or store. Simplified version of
- the GXin_Load and GXin_Store cases below. */
+/* Generate a machine-word sized load or store using exact 2 bundles.
+ Simplified version of the GXin_Load and GXin_Store cases below. */
static UChar* do_load_or_store_machine_word ( UChar* p, Bool isLoad, UInt reg,
TILEGXAMode* am )
{
+ UInt rA = iregNo(am->GXam.IR.base);
+
if (am->tag != GXam_IR)
vpanic(__func__);
- if (isLoad) /* load */
- p = doAMode_IR(p, TILEGX_OPC_LD, reg, am);
- else /* store */
- p = doAMode_IR(p, TILEGX_OPC_ST, reg, am);
-
+ if (isLoad) /* load */ {
+ /* r51 is reserved scratch registers. */
+ p = mkInsnBin(p, mkTileGxInsn(TILEGX_OPC_ADDLI, 3,
+ 51, rA, am->GXam.IR.index));
+ /* load from address in r51 to rSD. */
+ p = mkInsnBin(p, mkTileGxInsn(TILEGX_OPC_LD, 2, reg, 51));
+ } else /* store */ {
+ /* r51 is reserved scratch registers. */
+ p = mkInsnBin(p, mkTileGxInsn(TILEGX_OPC_ADDLI, 3,
+ 51, rA, am->GXam.IR.index));
+ /* store rSD to address in r51 */
+ p = mkInsnBin(p, mkTileGxInsn(TILEGX_OPC_ST, 2, 51, reg));
+ }
return p;
}
******* END of the patch ******
(waiting for this to be done, I could always disable in the test
using tilegx as a host)
Thanks
Philippe
|