|
From: <sv...@va...> - 2013-08-08 10:29:20
|
sewardj 2013-08-08 11:28:59 +0100 (Thu, 08 Aug 2013)
New Revision: 2739
Log:
Add infrastructural support (IR, VEX) to allow returns of 128-
and 256-bit values from dirty helper functions, in a way which is
independent of the target ABIs and of compilers generating
correct struct return code.
Is a prereq for bug #294285.
MIPS fixes: Petar Jovanovic, mip...@gm...
S390 fixes: Maran, ma...@li...
Modified files:
trunk/priv/guest_amd64_toIR.c
trunk/priv/guest_mips_defs.h
trunk/priv/guest_mips_helpers.c
trunk/priv/guest_mips_toIR.c
trunk/priv/guest_ppc_toIR.c
trunk/priv/guest_s390_toIR.c
trunk/priv/guest_x86_toIR.c
trunk/priv/host_amd64_defs.c
trunk/priv/host_amd64_isel.c
trunk/priv/host_arm_defs.c
trunk/priv/host_arm_isel.c
trunk/priv/host_generic_regs.c
trunk/priv/host_generic_regs.h
trunk/priv/host_mips_defs.c
trunk/priv/host_mips_isel.c
trunk/priv/host_ppc_defs.c
trunk/priv/host_ppc_isel.c
trunk/priv/host_s390_defs.c
trunk/priv/host_s390_defs.h
trunk/priv/host_s390_isel.c
trunk/priv/host_x86_defs.c
trunk/priv/host_x86_isel.c
trunk/priv/ir_defs.c
trunk/priv/ir_opt.c
trunk/priv/main_main.c
trunk/pub/libvex_ir.h
Modified: trunk/pub/libvex_ir.h (+43 -7)
===================================================================
--- trunk/pub/libvex_ir.h 2013-08-07 10:45:08 +01:00 (rev 2738)
+++ trunk/pub/libvex_ir.h 2013-08-08 11:28:59 +01:00 (rev 2739)
@@ -1853,6 +1853,12 @@
Ist_Dirty inhibits various IR optimisations and so can cause
quite poor code to be generated. Try to avoid it.
+ In principle it would be allowable to have the arg vector
+ contain the special value IRExprP__VECRET, although not
+ IRExprP__BBPTR. However, at the moment there is no
+ requirement for clean helper calls to be able to return V128
+ or V256 values. Hence this is not allowed.
+
ppIRExpr output: <cee>(<args>):<retty>
eg. foo{0x80489304}(t1, t2):I32
*/
@@ -1894,6 +1900,34 @@
IRExpr* arg4; /* operand 4 */
};
+
+/* Two special constants of type IRExpr*, which can ONLY be used in
+ argument lists for dirty helper calls (IRDirty.args) and in NO
+ OTHER PLACES. And then only in very limited ways. These constants
+ are not pointer-aligned and hence can't be confused with real
+ IRExpr*s nor with NULL. */
+
+/* Denotes an argument which (in the helper) takes a pointer to a
+ (naturally aligned) V128 or V256, into which the helper is expected
+ to write its result. Use of IRExprP__VECRET is strictly
+ controlled. If the helper returns a V128 or V256 value then
+ IRExprP__VECRET must appear exactly once in the arg list, although
+ it can appear anywhere, and the helper must have a C 'void' return
+ type. If the helper returns any other type, IRExprP__VECRET may
+ not appear in the argument list. */
+#define IRExprP__VECRET ((IRExpr*)9)
+
+/* Denotes an void* argument which is passed to the helper, which at
+ run time will point to the thread's guest state area. This can
+ only appear at most once in an argument list, and it may not appear
+ at all in argument lists for clean helper calls. */
+#define IRExprP__BBPTR ((IRExpr*)17)
+
+static inline Bool is_IRExprP__VECRET_or_BBPTR ( IRExpr* e ) {
+ return e == IRExprP__VECRET || e == IRExprP__BBPTR;
+}
+
+
/* Expression constructors. */
extern IRExpr* IRExpr_Binder ( Int binder );
extern IRExpr* IRExpr_Get ( Int off, IRType ty );
@@ -2053,11 +2087,12 @@
number of times at a fixed interval, if required.
Normally, code is generated to pass just the args to the helper.
- However, if .needsBBP is set, then an extra first argument is
- passed, which is the baseblock pointer, so that the callee can
- access the guest state. It is invalid for .nFxState to be zero
- but .needsBBP to be True, since .nFxState==0 is a claim that the
- call does not access guest state.
+ However, if IRExprP__BBPTR is present in the argument list (at most
+ one instance is allowed), then the baseblock pointer is passed for
+ that arg, so that the callee can access the guest state. It is
+ invalid for .nFxState to be zero but IRExprP__BBPTR to be present,
+ since .nFxState==0 is a claim that the call does not access guest
+ state.
IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The
arguments and 'mFx' are evaluated REGARDLESS of the guard value.
@@ -2092,7 +2127,9 @@
allowed. */
IRCallee* cee; /* where to call */
IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */
- IRExpr** args; /* arg list, ends in NULL */
+ /* The args vector may contain IRExprP__BBPTR and/or
+ IRExprP__VECRET, in both cases, at most once. */
+ IRExpr** args; /* arg vector, ends in NULL. */
IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */
/* Mem effects; we allow only one R/W/M region to be stated */
@@ -2101,7 +2138,6 @@
Int mSize; /* of access, or zero if mFx==Ifx_None */
/* Guest state effects; up to N allowed */
- Bool needsBBP; /* True => also pass guest state ptr to callee */
Int nFxState; /* must be 0 .. VEX_N_FXSTATE */
struct {
IREffect fx:16; /* read, write or modify? Ifx_None is invalid. */
Modified: trunk/priv/host_x86_isel.c (+224 -90)
===================================================================
--- trunk/priv/host_x86_isel.c 2013-08-07 10:45:08 +01:00 (rev 2738)
+++ trunk/priv/host_x86_isel.c 2013-08-08 11:28:59 +01:00 (rev 2739)
@@ -340,10 +340,23 @@
/* Push an arg onto the host stack, in preparation for a call to a
helper function of some kind. Returns the number of 32-bit words
- pushed. */
-
-static Int pushArg ( ISelEnv* env, IRExpr* arg )
+ pushed. If we encounter an IRExprP__VECRET then we expect that
+ r_vecRetAddr will be a valid register, that holds the relevant
+ address.
+*/
+static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
{
+ if (UNLIKELY(arg == IRExprP__VECRET)) {
+ vassert(0); //ATC
+ vassert(!hregIsInvalid(r_vecRetAddr));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
+ return 1;
+ }
+ if (UNLIKELY(arg == IRExprP__BBPTR)) {
+ addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
+ return 1;
+ }
+ /* Else it's a "normal" expression. */
IRType arg_ty = typeOfIRExpr(env->type_env, arg);
if (arg_ty == Ity_I32) {
addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
@@ -389,6 +402,12 @@
static
Bool mightRequireFixedRegs ( IRExpr* e )
{
+ if (UNLIKELY(is_IRExprP__VECRET_or_BBPTR(e))) {
+ // These are always "safe" -- either a copy of %esp in some
+ // arbitrary vreg, or a copy of %ebp, respectively.
+ return False;
+ }
+ /* Else it's a "normal" expression. */
switch (e->tag) {
case Iex_RdTmp: case Iex_Const: case Iex_Get:
return False;
@@ -398,15 +417,19 @@
}
-/* Do a complete function call. guard is a Ity_Bit expression
+/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
- call is unconditional. */
+ call is unconditional. |retloc| is set to indicate where the
+ return value is after the call. The caller (of this fn) must
+ generate code to add |stackAdjustAfterCall| to the stack pointer
+ after the call is done. */
static
-void doHelperCall ( ISelEnv* env,
- Bool passBBP,
- IRExpr* guard, IRCallee* cee, IRExpr** args,
- RetLoc rloc )
+void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
+ /*OUT*/RetLoc* retloc,
+ ISelEnv* env,
+ IRExpr* guard,
+ IRCallee* cee, IRType retTy, IRExpr** args )
{
X86CondCode cc;
HReg argregs[3];
@@ -415,12 +438,29 @@
Int not_done_yet, n_args, n_arg_ws, stack_limit,
i, argreg, argregX;
+ /* Set default returns. We'll update them later if needed. */
+ *stackAdjustAfterCall = 0;
+ *retloc = mk_RetLoc_INVALID();
+
+ /* These are used for cross-checking that IR-level constraints on
+ the use of IRExprP__VECRET and IRExprP__BBPTR are observed. */
+ UInt nVECRETs = 0;
+ UInt nBBPTRs = 0;
+
/* Marshal args for a call, do the call, and clear the stack.
Complexities to consider:
- * if passBBP is True, %ebp (the baseblock pointer) is to be
- passed as the first arg.
+ * The return type can be I{64,32,16,8} or V128. In the V128
+ case, it is expected that |args| will contain the special
+ value IRExprP__VECRET, in which case this routine generates
+ code to allocate space on the stack for the vector return
+ value. Since we are not passing any scalars on the stack, it
+ is enough to preallocate the return space before marshalling
+ any arguments, in this case.
+ |args| may also contain IRExprP__BBPTR, in which case the
+ value in %ebp is passed as the corresponding argument.
+
* If the callee claims regparmness of 1, 2 or 3, we must pass the
first 1, 2 or 3 args in registers (EAX, EDX, and ECX
respectively). To keep things relatively simple, only args of
@@ -463,21 +503,45 @@
*/
vassert(cee->regparms >= 0 && cee->regparms <= 3);
+ /* Count the number of args and also the VECRETs */
n_args = n_arg_ws = 0;
- while (args[n_args]) n_args++;
+ while (args[n_args]) {
+ IRExpr* arg = args[n_args];
+ n_args++;
+ if (UNLIKELY(arg == IRExprP__VECRET)) {
+ nVECRETs++;
+ } else if (UNLIKELY(arg == IRExprP__BBPTR)) {
+ nBBPTRs++;
+ }
+ }
+ /* If this fails, the IR is ill-formed */
+ vassert(nBBPTRs == 0 || nBBPTRs == 1);
+
+ /* If we have a VECRET, allocate space on the stack for the return
+ value, and record the stack pointer after that. */
+ HReg r_vecRetAddr = INVALID_HREG;
+ if (nVECRETs == 1) {
+ vassert(retTy == Ity_V128 || retTy == Ity_V256);
+ vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
+ r_vecRetAddr = newVRegI(env);
+ sub_from_esp(env, 16);
+ addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
+ } else {
+ // If either of these fail, the IR is ill-formed
+ vassert(retTy != Ity_V128 && retTy != Ity_V256);
+ vassert(nVECRETs == 0);
+ }
+
not_done_yet = n_args;
- if (passBBP)
- not_done_yet++;
stack_limit = cee->regparms;
- if (cee->regparms > 0 && passBBP) stack_limit--;
/* ------ BEGIN marshall all arguments ------ */
/* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
for (i = n_args-1; i >= stack_limit; i--) {
- n_arg_ws += pushArg(env, args[i]);
+ n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
not_done_yet--;
}
@@ -518,10 +582,18 @@
vex_printf("\n");
}
+ IRExpr* arg = args[i];
argreg--;
vassert(argreg >= 0);
- vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
- tmpregs[argreg] = iselIntExpr_R(env, args[i]);
+ if (UNLIKELY(arg == IRExprP__VECRET)) {
+ vassert(0); //ATC
+ }
+ else if (UNLIKELY(arg == IRExprP__BBPTR)) {
+ vassert(0); //ATC
+ } else {
+ vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
+ tmpregs[argreg] = iselIntExpr_R(env, arg);
+ }
not_done_yet--;
}
for (i = stack_limit-1; i >= 0; i--) {
@@ -534,35 +606,30 @@
/* It's safe to compute all regparm args directly into their
target registers. */
for (i = stack_limit-1; i >= 0; i--) {
+ IRExpr* arg = args[i];
argreg--;
vassert(argreg >= 0);
- vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
- addInstr(env, X86Instr_Alu32R(Xalu_MOV,
- iselIntExpr_RMI(env, args[i]),
- argregs[argreg]));
+ if (UNLIKELY(arg == IRExprP__VECRET)) {
+ vassert(!hregIsInvalid(r_vecRetAddr));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,
+ X86RMI_Reg(r_vecRetAddr),
+ argregs[argreg]));
+ }
+ else if (UNLIKELY(arg == IRExprP__BBPTR)) {
+ vassert(0); //ATC
+ } else {
+ vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,
+ iselIntExpr_RMI(env, arg),
+ argregs[argreg]));
+ }
not_done_yet--;
}
}
- /* Not forgetting %ebp if needed. */
- if (passBBP) {
- vassert(argreg == 1);
- addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
- not_done_yet--;
- }
-
/* ------ END deal with regparms ------ */
- } else {
-
- /* No regparms. Heave %ebp on the stack if needed. */
- if (passBBP) {
- addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
- n_arg_ws++;
- not_done_yet--;
- }
-
}
vassert(not_done_yet == 0);
@@ -584,8 +651,39 @@
}
}
- /* call the helper, and get the args off the stack afterwards. */
- callHelperAndClearArgs( env, cc, cee, n_arg_ws, rloc );
+ /* Do final checks, set the return values, and generate the call
+ instruction proper. */
+ vassert(*stackAdjustAfterCall == 0);
+ vassert(is_RetLoc_INVALID(*retloc));
+ switch (retTy) {
+ case Ity_INVALID:
+ /* Function doesn't return a value. */
+ *retloc = mk_RetLoc_simple(RLPri_None);
+ break;
+ case Ity_I64:
+ *retloc = mk_RetLoc_simple(RLPri_2Int);
+ break;
+ case Ity_I32: case Ity_I16: case Ity_I8:
+ *retloc = mk_RetLoc_simple(RLPri_Int);
+ break;
+ case Ity_V128:
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+ *stackAdjustAfterCall = 16;
+ break;
+ case Ity_V256:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+ *stackAdjustAfterCall = 32;
+ break;
+ default:
+ /* IR can denote other possible return types, but we don't
+ handle those here. */
+ vassert(0);
+ }
+
+ /* Finally, generate the call itself. This needs the *retloc value
+ set in the switch above, which is why it's at the end. */
+ callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
}
@@ -1307,7 +1405,7 @@
addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
- 0, RetLocInt ));
+ 0, mk_RetLoc_simple(RLPri_Int) ));
add_to_esp(env, 2*4);
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
return dst;
@@ -1371,8 +1469,13 @@
goto irreducible;
/* Marshal args, do the call, clear stack. */
- doHelperCall( env, False, NULL, e->Iex.CCall.cee,
- e->Iex.CCall.args, RetLocInt );
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
+ e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
+ vassert(is_sane_RetLoc(rloc));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
return dst;
@@ -1890,8 +1993,15 @@
vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
vassert(con->Iex.Const.con->tag == Ico_U32);
/* Marshal args, do the call. */
- doHelperCall( env, False, NULL, cal->Iex.CCall.cee,
- cal->Iex.CCall.args, RetLocInt );
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
+ cal->Iex.CCall.cee,
+ cal->Iex.CCall.retty, cal->Iex.CCall.args );
+ vassert(is_sane_RetLoc(rloc));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
+ /* */
addInstr(env, X86Instr_Alu32R(Xalu_CMP,
X86RMI_Imm(con->Iex.Const.con->Ico.U32),
hregX86_EAX()));
@@ -2432,7 +2542,7 @@
addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
- 0, RetLoc2Int ));
+ 0, mk_RetLoc_simple(RLPri_2Int) ));
add_to_esp(env, 4*4);
addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
@@ -2472,7 +2582,7 @@
addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
- 0, RetLoc2Int ));
+ 0, mk_RetLoc_simple(RLPri_2Int) ));
add_to_esp(env, 3*4);
addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
@@ -2711,7 +2821,7 @@
addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
- 0, RetLoc2Int ));
+ 0, mk_RetLoc_simple(RLPri_2Int) ));
add_to_esp(env, 2*4);
addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
@@ -2732,8 +2842,15 @@
HReg tHi = newVRegI(env);
/* Marshal args, do the call, clear stack. */
- doHelperCall( env, False, NULL, e->Iex.CCall.cee,
- e->Iex.CCall.args, RetLoc2Int );
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
+ e->Iex.CCall.cee,
+ e->Iex.CCall.retty, e->Iex.CCall.args );
+ vassert(is_sane_RetLoc(rloc));
+ vassert(rloc.pri == RLPri_2Int);
+ vassert(addToSp == 0);
+ /* */
addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
@@ -3657,7 +3774,7 @@
X86AMode_IR(0, hregX86_ECX())));
/* call the helper */
addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
- 3, RetLocNone ));
+ 3, mk_RetLoc_simple(RLPri_None) ));
/* fetch the result from memory, using %r_argp, which the
register allocator will keep alive across the call. */
addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
@@ -3925,61 +4042,78 @@
/* --------- Call to DIRTY helper --------- */
case Ist_Dirty: {
IRDirty* d = stmt->Ist.Dirty.details;
- Bool passBBP = False;
- if (d->nFxState == 0)
- vassert(!d->needsBBP);
-
- passBBP = toBool(d->nFxState > 0 && d->needsBBP);
-
/* Figure out the return type, if any. */
IRType retty = Ity_INVALID;
if (d->tmp != IRTemp_INVALID)
retty = typeOfIRTemp(env->type_env, d->tmp);
- /* Marshal args, do the call, clear stack, set the return value
- to 0x555..555 if this is a conditional call that returns a
- value and the call is skipped. We need to set the ret-loc
- correctly in order to implement the IRDirty semantics that
- the return value is 0x555..555 if the call doesn't happen. */
- RetLoc rloc = RetLocINVALID;
+ Bool retty_ok = False;
switch (retty) {
case Ity_INVALID: /* function doesn't return anything */
- rloc = RetLocNone; break;
- case Ity_I64:
- rloc = RetLoc2Int; break;
- case Ity_I32: case Ity_I16: case Ity_I8:
- rloc = RetLocInt; break;
+ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
+ case Ity_V128:
+ retty_ok = True; break;
default:
break;
}
- if (rloc == RetLocINVALID)
+ if (!retty_ok)
break; /* will go to stmt_fail: */
- /* Marshal args, do the call, clear stack. */
- doHelperCall( env, passBBP, d->guard, d->cee, d->args, rloc );
+ /* Marshal args, do the call, and set the return value to
+ 0x555..555 if this is a conditional call that returns a value
+ and the call is skipped. */
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
+ vassert(is_sane_RetLoc(rloc));
/* Now figure out what to do with the returned value, if any. */
- if (d->tmp == IRTemp_INVALID)
- /* No return value. Nothing to do. */
- return;
-
- if (retty == Ity_I64) {
- HReg dstHi, dstLo;
- /* The returned value is in %edx:%eax. Park it in the
- register-pair associated with tmp. */
- lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
- addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
- addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
- return;
+ switch (retty) {
+ case Ity_INVALID: {
+ /* No return value. Nothing to do. */
+ vassert(d->tmp == IRTemp_INVALID);
+ vassert(rloc.pri == RLPri_None);
+ vassert(addToSp == 0);
+ return;
+ }
+ case Ity_I32: case Ity_I16: case Ity_I8: {
+ /* The returned value is in %eax. Park it in the register
+ associated with tmp. */
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
+ HReg dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
+ return;
+ }
+ case Ity_I64: {
+ /* The returned value is in %edx:%eax. Park it in the
+ register-pair associated with tmp. */
+ vassert(rloc.pri == RLPri_2Int);
+ vassert(addToSp == 0);
+ HReg dstHi, dstLo;
+ lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
+ return;
+ }
+ case Ity_V128: {
+ /* The returned value is on the stack, and *retloc tells
+ us where. Fish it off the stack and then move the
+ stack pointer upwards to clear it, as directed by
+ doHelperCall. */
+ vassert(rloc.pri == RLPri_V128SpRel);
+ vassert(addToSp >= 16);
+ HReg dst = lookupIRTemp(env, d->tmp);
+ X86AMode* am = X86AMode_IR(rloc.spOff, hregX86_ESP());
+ addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
+ add_to_esp(env, addToSp);
+ return;
+ }
+ default:
+ /*NOTREACHED*/
+ vassert(0);
}
- if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
- /* The returned value is in %eax. Park it in the register
- associated with tmp. */
- HReg dst = lookupIRTemp(env, d->tmp);
- addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
- return;
- }
break;
}
Modified: trunk/priv/host_mips_isel.c (+221 -108)
===================================================================
--- trunk/priv/host_mips_isel.c 2013-08-07 10:45:08 +01:00 (rev 2738)
+++ trunk/priv/host_mips_isel.c 2013-08-08 11:28:59 +01:00 (rev 2739)
@@ -379,12 +379,18 @@
return fr_dst;
}
-/* Do a complete function call. guard is a Ity_Bit expression
+/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
- call is unconditional. */
+ call is unconditional. |retloc| is set to indicate where the
+ return value is after the call. The caller (of this fn) must
+ generate code to add |stackAdjustAfterCall| to the stack pointer
+ after the call is done. */
-static void doHelperCall(ISelEnv * env, Bool passBBP, IRExpr * guard,
- IRCallee * cee, IRExpr ** args, RetLoc rloc)
+static void doHelperCall(/*OUT*/UInt* stackAdjustAfterCall,
+ /*OUT*/RetLoc* retloc,
+ ISelEnv* env,
+ IRExpr* guard,
+ IRCallee* cee, IRType retTy, IRExpr** args )
{
MIPSCondCode cc;
HReg argregs[MIPS_N_REGPARMS];
@@ -392,9 +398,17 @@
Bool go_fast;
Int n_args, i, argreg;
UInt argiregs;
- ULong target;
HReg src = INVALID_HREG;
+ /* Set default returns. We'll update them later if needed. */
+ *stackAdjustAfterCall = 0;
+ *retloc = mk_RetLoc_INVALID();
+
+ /* These are used for cross-checking that IR-level constraints on
+ the use of IRExprP__VECRET and IRExprP__BBPTR are observed. */
+ UInt nVECRETs = 0;
+ UInt nBBPTRs = 0;
+
/* MIPS O32 calling convention: up to four registers ($a0 ... $a3)
are allowed to be used for passing integer arguments. They correspond
to regs GPR4 ... GPR7. Note that the cee->regparms field is meaningless
@@ -406,11 +420,31 @@
to regs GPR4 ... GPR11. Note that the cee->regparms field is meaningless
on MIPS host (since we only implement one calling convention) and so we
always ignore it. */
+
+ /* The return type can be I{64,32,16,8} or V{128,256}. In the
+ latter two cases, it is expected that |args| will contain the
+ special value IRExprP__VECRET, in which case this routine
+ generates code to allocate space on the stack for the vector
+ return value. Since we are not passing any scalars on the
+ stack, it is enough to preallocate the return space before
+ marshalling any arguments, in this case.
+
+ |args| may also contain IRExprP__BBPTR, in which case the value
+ in the guest state pointer register is passed as the
+ corresponding argument. */
+
n_args = 0;
- for (i = 0; args[i]; i++)
+ for (i = 0; args[i]; i++) {
+ IRExpr* arg = args[i];
+ if (UNLIKELY(arg == IRExprP__VECRET)) {
+ nVECRETs++;
+ } else if (UNLIKELY(arg == IRExprP__BBPTR)) {
+ nBBPTRs++;
+ }
n_args++;
+ }
- if (MIPS_N_REGPARMS < n_args + (passBBP ? 1 : 0)) {
+ if (n_args > MIPS_N_REGPARMS) {
vpanic("doHelperCall(MIPS): cannot currently handle > 4 or 8 args");
}
if (mode64) {
@@ -423,6 +457,9 @@
argregs[6] = hregMIPS_GPR10(mode64);
argregs[7] = hregMIPS_GPR11(mode64);
argiregs = 0;
+ tmpregs[0] = tmpregs[1] = tmpregs[2] =
+ tmpregs[3] = tmpregs[4] = tmpregs[5] =
+ tmpregs[6] = tmpregs[7] = INVALID_HREG;
} else {
argregs[0] = hregMIPS_GPR4(mode64);
argregs[1] = hregMIPS_GPR5(mode64);
@@ -429,16 +466,21 @@
argregs[2] = hregMIPS_GPR6(mode64);
argregs[3] = hregMIPS_GPR7(mode64);
argiregs = 0;
+ tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
}
- tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
-
/* First decide which scheme (slow or fast) is to be used. First assume the
fast scheme, and select slow if any contraindications (wow) appear. */
go_fast = True;
- if (guard) {
+ /* We'll need space on the stack for the return value. Avoid
+ possible complications with nested calls by using the slow
+ scheme. */
+ if (retTy == Ity_V128 || retTy == Ity_V256)
+ go_fast = False;
+
+ if (go_fast && guard) {
if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1
&& guard->Iex.Const.con->Ico.U1 == True) {
/* unconditional */
@@ -462,34 +504,41 @@
if (go_fast) {
/* FAST SCHEME */
argreg = 0;
- if (passBBP) {
- argiregs |= (1 << (argreg + 4));
- addInstr(env, mk_iMOVds_RR(argregs[argreg],
- GuestStatePointer(mode64)));
- argreg++;
- }
for (i = 0; i < n_args; i++) {
+ IRExpr* arg = args[i];
vassert(argreg < MIPS_N_REGPARMS);
- vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32
- || typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
- if (typeOfIRExpr(env->type_env, args[i]) == Ity_I32 || mode64) {
+
+ IRType aTy = Ity_INVALID;
+ if (LIKELY(!is_IRExprP__VECRET_or_BBPTR(arg)))
+ aTy = typeOfIRExpr(env->type_env, arg);
+
+ if (aTy == Ity_I32 || mode64) {
argiregs |= (1 << (argreg + 4));
- addInstr(env, mk_iMOVds_RR(argregs[argreg], iselWordExpr_R(env,
- args[i])));
- } else { /* Ity_I64 */
+ addInstr(env, mk_iMOVds_RR(argregs[argreg],
+ iselWordExpr_R(env, arg)));
+ argreg++;
+ } else if (aTy == Ity_I64) { /* Ity_I64 */
if (argreg & 1) {
argreg++;
argiregs |= (1 << (argreg + 4));
}
HReg rHi, rLo;
- iselInt64Expr(&rHi, &rLo, env, args[i]);
+ iselInt64Expr(&rHi, &rLo, env, arg);
argiregs |= (1 << (argreg + 4));
addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
argiregs |= (1 << (argreg + 4));
addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
+ argreg++;
+ } else if (arg == IRExprP__BBPTR) {
+ vassert(0); // ATC
+ addInstr(env, mk_iMOVds_RR(argregs[argreg],
+ GuestStatePointer(mode64)));
+ argreg++;
+ } else if (arg == IRExprP__VECRET) {
+ // If this happens, it denotes ill-formed IR.
+ vassert(0);
}
- argreg++;
}
/* Fast scheme only applies for unconditional calls. Hence: */
cc = MIPScc_AL;
@@ -496,32 +545,38 @@
} else {
/* SLOW SCHEME; move via temporaries */
argreg = 0;
- if (passBBP) {
- /* This is pretty stupid; better to move directly to r3
- after the rest of the args are done. */
- tmpregs[argreg] = newVRegI(env);
- addInstr(env, mk_iMOVds_RR(tmpregs[argreg],
- GuestStatePointer(mode64)));
- argreg++;
- }
+
for (i = 0; i < n_args; i++) {
vassert(argreg < MIPS_N_REGPARMS);
- vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32
- || typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
- if (typeOfIRExpr(env->type_env, args[i]) == Ity_I32 || mode64) {
- tmpregs[argreg] = iselWordExpr_R(env, args[i]);
- } else { /* Ity_I64 */
+ IRExpr* arg = args[i];
+
+ IRType aTy = Ity_INVALID;
+ if (LIKELY(!is_IRExprP__VECRET_or_BBPTR(arg)))
+ aTy = typeOfIRExpr(env->type_env, arg);
+
+ if (aTy == Ity_I32 || mode64) {
+ tmpregs[argreg] = iselWordExpr_R(env, arg);
+ argreg++;
+ } else if (aTy == Ity_I64) { /* Ity_I64 */
if (argreg & 1)
argreg++;
if (argreg + 1 >= MIPS_N_REGPARMS)
vassert(0); /* out of argregs */
HReg raHi, raLo;
- iselInt64Expr(&raHi, &raLo, env, args[i]);
+ iselInt64Expr(&raHi, &raLo, env, arg);
tmpregs[argreg] = raLo;
argreg++;
tmpregs[argreg] = raHi;
+ argreg++;
+ } else if (arg == IRExprP__BBPTR) {
+ vassert(0); // ATC
+ tmpregs[argreg] = GuestStatePointer(mode64);
+ argreg++;
}
- argreg++;
+ else if (arg == IRExprP__VECRET) {
+ // If this happens, it denotes ill-formed IR
+ vassert(0);
+ }
}
/* Now we can compute the condition. We can't do it earlier
@@ -549,14 +604,49 @@
}
}
- target = mode64 ? Ptr_to_ULong(cee->addr) :
- toUInt(Ptr_to_ULong(cee->addr));
+ /* Do final checks, set the return values, and generate the call
+ instruction proper. */
+ vassert(nBBPTRs == 0 || nBBPTRs == 1);
+ vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
+ vassert(*stackAdjustAfterCall == 0);
+ vassert(is_RetLoc_INVALID(*retloc));
+ switch (retTy) {
+ case Ity_INVALID:
+ /* Function doesn't return a value. */
+ *retloc = mk_RetLoc_simple(RLPri_None);
+ break;
+ case Ity_I64:
+ *retloc = mk_RetLoc_simple(mode64 ? RLPri_Int : RLPri_2Int);
+ break;
+ case Ity_I32: case Ity_I16: case Ity_I8:
+ *retloc = mk_RetLoc_simple(RLPri_Int);
+ break;
+ case Ity_V128:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+ *stackAdjustAfterCall = 16;
+ break;
+ case Ity_V256:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+ *stackAdjustAfterCall = 32;
+ break;
+ default:
+ /* IR can denote other possible return types, but we don't
+ handle those here. */
+ vassert(0);
+ }
- /* Finally, the call itself. */
+ ULong target = mode64 ? Ptr_to_ULong(cee->addr) :
+ toUInt(Ptr_to_ULong(cee->addr));
+
+ /* Finally, generate the call itself. This needs the *retloc value
+ set in the switch above, which is why it's at the end. */
if (cc == MIPScc_AL)
- addInstr(env, MIPSInstr_CallAlways(cc, (Addr64)target, argiregs, rloc));
+ addInstr(env, MIPSInstr_CallAlways(cc, (Addr64)target, argiregs,
+ *retloc));
else
- addInstr(env, MIPSInstr_Call(cc, (Addr64)target, argiregs, src, rloc));
+ addInstr(env, MIPSInstr_Call(cc, (Addr64)target, argiregs, src, *retloc));
}
/*---------------------------------------------------------*/
@@ -1244,12 +1334,13 @@
}
/* What's the retloc? */
- RetLoc rloc = RetLocINVALID;
+ RetLoc rloc = mk_RetLoc_INVALID();
if (ty == Ity_I32) {
- rloc = RetLocInt;
+ rloc = mk_RetLoc_simple(RLPri_Int);
}
else if (ty == Ity_I64) {
- rloc = mode64 ? RetLocInt : RetLoc2Int;
+ rloc = mode64 ? mk_RetLoc_simple(RLPri_Int) :
+ mk_RetLoc_simple(RLPri_2Int);
}
else {
goto irreducible;
@@ -1681,12 +1772,13 @@
break;
}
- RetLoc rloc = RetLocINVALID;
+ RetLoc rloc = mk_RetLoc_INVALID();
if (ty == Ity_I32) {
- rloc = RetLocInt;
+ rloc = mk_RetLoc_simple(RLPri_Int);
}
else if (ty == Ity_I64) {
- rloc = mode64 ? RetLocInt : RetLoc2Int;
+ rloc = mode64 ? mk_RetLoc_simple(RLPri_Int) :
+ mk_RetLoc_simple(RLPri_2Int);
}
else {
goto irreducible;
@@ -1796,23 +1888,18 @@
/* be very restrictive for now. Only 32/64-bit ints allowed for
args, and 32 bits for return type. Don't forget to change
the RetLoc if more return types are allowed in future. */
- if (e->Iex.CCall.retty != Ity_I32 && !mode64)
+ if (e->Iex.CCall.retty != Ity_I32)
goto irreducible;
- /* What's the retloc? */
- RetLoc rloc = RetLocINVALID;
- if (ty == Ity_I32) {
- rloc = RetLocInt;
- }
- else if (ty == Ity_I64) {
- rloc = mode64 ? RetLocInt : RetLoc2Int;
- }
- else {
- goto irreducible;
- }
+ /* Marshal args, do the call, clear stack. */
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall(&addToSp, &rloc, env, NULL/*guard*/, e->Iex.CCall.cee,
+ e->Iex.CCall.retty, e->Iex.CCall.args );
- /* Marshal args, do the call, clear stack. */
- doHelperCall(env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args, rloc);
+ vassert(is_sane_RetLoc(rloc));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
addInstr(env, mk_iMOVds_RR(r_dst, hregMIPS_GPR2(mode64)));
return r_dst;
}
@@ -3704,66 +3791,92 @@
/* --------- Call to DIRTY helper --------- */
case Ist_Dirty: {
IRDirty *d = stmt->Ist.Dirty.details;
- Bool passBBP = False;
- if (d->nFxState == 0)
- vassert(!d->needsBBP);
-
- passBBP = toBool(d->nFxState > 0 && d->needsBBP);
-
/* Figure out the return type, if any. */
IRType retty = Ity_INVALID;
if (d->tmp != IRTemp_INVALID)
retty = typeOfIRTemp(env->type_env, d->tmp);
- /* Marshal args, do the call, clear stack, set the return
- value to 0x555..555 if this is a conditional call that
- returns a value and the call is skipped. We need to set
- the ret-loc correctly in order to implement the IRDirty
- semantics that the return value is 0x555..555 if the call
- doesn't happen. */
- RetLoc rloc = RetLocINVALID;
+ /* Throw out any return types we don't know about. */
+ Bool retty_ok = False;
switch (retty) {
- case Ity_INVALID: /* function doesn't return anything */
- rloc = RetLocNone; break;
- case Ity_I64:
- rloc = mode64 ? RetLocInt : RetLoc2Int; break;
- case Ity_I32: case Ity_I16: case Ity_I8:
- rloc = RetLocInt; break;
+ case Ity_INVALID: /* Function doesn't return anything. */
+ case Ity_V128:
+ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
+ retty_ok = True; break;
default:
break;
}
- if (rloc == RetLocINVALID)
+
+ if (!retty_ok)
break; /* will go to stmt_fail: */
- /* Marshal args, do the call, clear stack. */
- doHelperCall(env, passBBP, d->guard, d->cee, d->args, rloc);
+ /* Marshal args, do the call, clear stack, set the return value
+ to 0x555..555 if this is a conditional call that returns a
+ value and the call is skipped. */
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
+ vassert(is_sane_RetLoc(rloc));
/* Now figure out what to do with the returned value, if any. */
- if (d->tmp == IRTemp_INVALID)
- /* No return value. Nothing to do. */
- return;
+ switch (retty) {
+ case Ity_INVALID: {
+ /* No return value. Nothing to do. */
+ vassert(d->tmp == IRTemp_INVALID);
+ vassert(rloc.pri == RLPri_None);
+ vassert(addToSp == 0);
+ return;
+ }
+ case Ity_I32: case Ity_I16: case Ity_I8: {
+ /* The returned value is in $v0. Park it in the register
+ associated with tmp. */
+ HReg r_dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(r_dst, hregMIPS_GPR2(mode64)));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
+ return;
+ }
+ case Ity_I64: {
+ if (mode64) {
+ /* The returned value is in $v0. Park it in the register
+ associated with tmp. */
+ HReg r_dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(r_dst, hregMIPS_GPR2(mode64)));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
+ return;
+ } else {
+ HReg rHi = newVRegI(env);
+ HReg rLo = newVRegI(env);
+ HReg dstHi, dstLo;
+ addInstr(env, mk_iMOVds_RR(rLo, hregMIPS_GPR2(mode64)));
+ addInstr(env, mk_iMOVds_RR(rHi, hregMIPS_GPR3(mode64)));
+ lookupIRTemp64(&dstHi, &dstLo, env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(dstHi, rHi));
+ addInstr(env, mk_iMOVds_RR(dstLo, rLo));
+ return;
+ }
+ }
+ case Ity_V128: {
+ /* ATC. The code that this produces really
+ needs to be looked at, to verify correctness.
+ I don't think this can ever happen though, since the
+ MIPS front end never produces 128-bit loads/stores. */
+ vassert(0);
+ vassert(rloc.pri == RLPri_V128SpRel);
+ vassert(addToSp >= 16);
+ HReg dst = lookupIRTemp(env, d->tmp);
+ MIPSAMode* am = MIPSAMode_IR(rloc.spOff, StackPointer(mode64));
+ addInstr(env, MIPSInstr_Load(mode64 ? 8 : 4, dst, am, mode64));
+ add_to_sp(env, addToSp);
+ return;
- if (retty == Ity_I64 && !mode64) {
- HReg rHi = newVRegI(env);
- HReg rLo = newVRegI(env);
- HReg dstHi, dstLo;
- addInstr(env, mk_iMOVds_RR(rLo, hregMIPS_GPR2(mode64)));
- addInstr(env, mk_iMOVds_RR(rHi, hregMIPS_GPR3(mode64)));
- lookupIRTemp64(&dstHi, &dstLo, env, d->tmp);
- addInstr(env, mk_iMOVds_RR(dstHi, rHi));
- addInstr(env, mk_iMOVds_RR(dstLo, rLo));
- return;
+ }
+ default:
+ /*NOTREACHED*/
+ vassert(0);
}
- if (retty == Ity_I8 || retty == Ity_I16 || retty == Ity_I32
- || (retty == Ity_I64 && mode64)) {
- /* The returned value is in %r2. Park it in the register
- associated with tmp. */
- HReg r_dst = lookupIRTemp(env, d->tmp);
- addInstr(env, mk_iMOVds_RR(r_dst, hregMIPS_GPR2(mode64)));
- return;
- }
- break;
}
/* --------- Load Linked or Store Conditional --------- */
Modified: trunk/priv/host_amd64_isel.c (+219 -93)
===================================================================
--- trunk/priv/host_amd64_isel.c 2013-08-07 10:45:08 +01:00 (rev 2738)
+++ trunk/priv/host_amd64_isel.c 2013-08-08 11:28:59 +01:00 (rev 2739)
@@ -366,6 +366,15 @@
HReg dst,
IRExpr* e )
{
+ /* Per comments in doHelperCall below, appearance of
+ IRExprP__VECRET implies ill-formed IR. */
+ vassert(e != IRExprP__VECRET);
+
+ /* In this case we give out a copy of the BaseBlock pointer. */
+ if (UNLIKELY(e == IRExprP__BBPTR)) {
+ return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
+ }
+
vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
if (e->tag == Iex_Const) {
@@ -409,27 +418,37 @@
}
-/* Do a complete function call. guard is a Ity_Bit expression
+/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
- call is unconditional. */
+ call is unconditional. |retloc| is set to indicate where the
+ return value is after the call. The caller (of this fn) must
+ generate code to add |stackAdjustAfterCall| to the stack pointer
+ after the call is done. */
static
-void doHelperCall ( ISelEnv* env,
- Bool passBBP,
- IRExpr* guard, IRCallee* cee, IRExpr** args,
- RetLoc rloc )
+void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
+ /*OUT*/RetLoc* retloc,
+ ISelEnv* env,
+ IRExpr* guard,
+ IRCallee* cee, IRType retTy, IRExpr** args )
{
AMD64CondCode cc;
HReg argregs[6];
HReg tmpregs[6];
AMD64Instr* fastinstrs[6];
- Int n_args, i, argreg;
+ UInt n_args, i;
+ /* Set default returns. We'll update them later if needed. */
+ *stackAdjustAfterCall = 0;
+ *retloc = mk_RetLoc_INVALID();
+
+ /* These are used for cross-checking that IR-level constraints on
+ the use of IRExprP__VECRET and IRExprP__BBPTR are observed. */
+ UInt nVECRETs = 0;
+ UInt nBBPTRs = 0;
+
/* Marshal args for a call and do the call.
- If passBBP is True, %rbp (the baseblock pointer) is to be passed
- as the first arg.
-
This function only deals with a tiny set of possibilities, which
cover all helpers in practice. The restrictions are that only
arguments in registers are supported, hence only 6x64 integer
@@ -436,6 +455,17 @@
bits in total can be passed. In fact the only supported arg
type is I64.
+ The return type can be I{64,32,16,8} or V{128,256}. In the
+ latter two cases, it is expected that |args| will contain the
+ special value IRExprP__VECRET, in which case this routine
+ generates code to allocate space on the stack for the vector
+ return value. Since we are not passing any scalars on the
+ stack, it is enough to preallocate the return space before
+ marshalling any arguments, in this case.
+
+ |args| may also contain IRExprP__BBPTR, in which case the
+ value in %rbp is passed as the corresponding argument.
+
Generating code which is both efficient and correct when
parameters are to be passed in registers is difficult, for the
reasons elaborated in detail in comments attached to
@@ -461,7 +491,10 @@
fast scheme, else use the slow scheme. Note also that only
unconditional calls may use the fast scheme, since having to
compute a condition expression could itself trash real
- registers.
+ registers. Note that for simplicity, in the case where
+ IRExprP__VECRET is present, we use the slow scheme. This is
+ motivated by the desire to avoid any possible complexity
+ w.r.t. nested calls.
Note this requires being able to examine an expression and
determine whether or not evaluation of it might use a fixed
@@ -474,12 +507,11 @@
/* Note that the cee->regparms field is meaningless on AMD64 host
(since there is only one calling convention) and so we always
ignore it. */
-
n_args = 0;
for (i = 0; args[i]; i++)
n_args++;
- if (6 < n_args + (passBBP ? 1 : 0))
+ if (n_args > 6)
vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
argregs[0] = hregAMD64_RDI();
@@ -499,6 +531,12 @@
assume the fast scheme, and select slow if any contraindications
(wow) appear. */
+ /* We'll need space on the stack for the return value. Avoid
+ possible complications with nested calls by using the slow
+ scheme. */
+ if (retTy == Ity_V128 || retTy == Ity_V256)
+ goto slowscheme;
+
if (guard) {
if (guard->tag == Iex_Const
&& guard->Iex.Const.con->tag == Ico_U1
@@ -517,26 +555,28 @@
in a buffer and emit that if we're successful. */
/* FAST SCHEME */
- argreg = 0;
- if (passBBP) {
- fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
- argreg++;
- }
-
+ /* In this loop, we process args that can be computed into the
+ destination (real) register with a single instruction, without
+ using any fixed regs. That also includes IRExprP__BBPTR, but
+ not IRExprP__VECRET. Indeed, if the IR is well-formed, we can
+ never see IRExprP__VECRET at this point, since the return-type
+ check above should ensure all those cases use the slow scheme
+ instead. */
+ vassert(n_args >= 0 && n_args <= 6);
for (i = 0; i < n_args; i++) {
- vassert(argreg < 6);
- vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
- fastinstrs[argreg]
- = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
- if (fastinstrs[argreg] == NULL)
+ IRExpr* arg = args[i];
+ if (LIKELY(!is_IRExprP__VECRET_or_BBPTR(arg))) {
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+ }
+ fastinstrs[i]
+ = iselIntExpr_single_instruction( env, argregs[i], args[i] );
+ if (fastinstrs[i] == NULL)
goto slowscheme;
- argreg++;
}
/* Looks like we're in luck. Emit the accumulated instructions and
move on to doing the call itself. */
- vassert(argreg <= 6);
- for (i = 0; i < argreg; i++)
+ for (i = 0; i < n_args; i++)
addInstr(env, fastinstrs[i]);
/* Fast scheme only applies for unconditional calls. Hence: */
@@ -547,26 +587,47 @@
/* SLOW SCHEME; move via temporaries */
slowscheme:
+ {}
# if 0 /* debug only */
if (n_args > 0) {for (i = 0; args[i]; i++) {
ppIRExpr(args[i]); vex_printf(" "); }
vex_printf("\n");}
# endif
- argreg = 0;
- if (passBBP) {
- /* This is pretty stupid; better to move directly to rdi
- after the rest of the args are done. */
- tmpregs[argreg] = newVRegI(env);
- addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
- argreg++;
+ /* If we have a vector return type, allocate a place for it on the
+ stack and record its address. */
+ HReg r_vecRetAddr = INVALID_HREG;
+ if (retTy == Ity_V128) {
+ r_vecRetAddr = newVRegI(env);
+ sub_from_rsp(env, 16);
+ addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
}
+ else if (retTy == Ity_V256) {
+ vassert(0); //ATC
+ r_vecRetAddr = newVRegI(env);
+ sub_from_rsp(env, 32);
+ addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
+ }
+ vassert(n_args >= 0 && n_args <= 6);
for (i = 0; i < n_args; i++) {
- vassert(argreg < 6);
- vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
- tmpregs[argreg] = iselIntExpr_R(env, args[i]);
- argreg++;
+ IRExpr* arg = args[i];
+ if (UNLIKELY(arg == IRExprP__BBPTR)) {
+ tmpregs[i] = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
+ nBBPTRs++;
+ }
+ else if (UNLIKELY(arg == IRExprP__VECRET)) {
+ /* We stashed the address of the return slot earlier, so just
+ retrieve it now. */
+ vassert(!hregIsInvalid(r_vecRetAddr));
+ tmpregs[i] = r_vecRetAddr;
+ nVECRETs++;
+ }
+ else {
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+ tmpregs[i] = iselIntExpr_R(env, args[i]);
+ }
}
/* Now we can compute the condition. We can't do it earlier
@@ -585,7 +646,7 @@
}
/* Move the args to their final destinations. */
- for (i = 0; i < argreg; i++) {
+ for (i = 0; i < n_args; i++) {
/* None of these insns, including any spill code that might
be generated, may alter the condition codes. */
addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
@@ -592,14 +653,47 @@
}
- /* Finally, the call itself. */
+ /* Do final checks, set the return values, and generate the call
+ instruction proper. */
handle_call:
- addInstr(env, AMD64Instr_Call(
- cc,
- Ptr_to_ULong(cee->addr),
- n_args + (passBBP ? 1 : 0), rloc
- )
- );
+
+ if (retTy == Ity_V128 || retTy == Ity_V256) {
+ vassert(nVECRETs == 1);
+ } else {
+ vassert(nVECRETs == 0);
+ }
+
+ vassert(nBBPTRs == 0 || nBBPTRs == 1);
+
+ vassert(*stackAdjustAfterCall == 0);
+ vassert(is_RetLoc_INVALID(*retloc));
+ switch (retTy) {
+ case Ity_INVALID:
+ /* Function doesn't return a value. */
+ *retloc = mk_RetLoc_simple(RLPri_None);
+ break;
+ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
+ *retloc = mk_RetLoc_simple(RLPri_Int);
+ break;
+ case Ity_V128:
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+ *stackAdjustAfterCall = 16;
+ break;
+ case Ity_V256:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+ *stackAdjustAfterCall = 32;
+ break;
+ default:
+ /* IR can denote other possible return types, but we don't
+ handle those here. */
+ vassert(0);
+ }
+
+ /* Finally, generate the call itself. This needs the *retloc value
+ set in the switch above, which is why it's at the end. */
+ addInstr(env,
+ AMD64Instr_Call(cc, Ptr_to_ULong(cee->addr), n_args, *retloc));
}
@@ -1136,7 +1230,8 @@
addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
- addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2, RetLocInt ));
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
+ mk_RetLoc_simple(RLPri_Int) ));
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
return dst;
}
@@ -1606,7 +1701,7 @@
fn = (HWord)h_generic_calc_GetMSBs8x8;
addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
- 1, RetLocInt ));
+ 1, mk_RetLoc_simple(RLPri_Int) ));
/* MovxLQ is not exactly the right thing here. We just
need to get the bottom 8 bits of RAX into dst, and zero
out everything else. Assuming that the helper returns
@@ -1637,7 +1732,7 @@
AMD64RMI_Mem(m16_rsp),
hregAMD64_RSI() )); /* 2nd arg */
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
- 2, RetLocInt ));
+ 2, mk_RetLoc_simple(RLPri_Int) ));
/* MovxLQ is not exactly the right thing here. We just
need to get the bottom 16 bits of RAX into dst, and zero
out everything else. Assuming that the helper returns
@@ -1671,7 +1766,8 @@
HReg dst = newVRegI(env);
HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
- addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1, RetLocInt ));
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
+ mk_RetLoc_simple(RLPri_Int) ));
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
return dst;
}
@@ -1726,14 +1822,18 @@
vassert(ty == e->Iex.CCall.retty);
/* be very restrictive for now. Only 64-bit ints allowed for
- args, and 64 or 32 bits for return type. Don't forget to
- change the RetLoc if more types are allowed in future. */
+ args, and 64 or 32 bits for return type. */
if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
goto irreducible;
/* Marshal args, do the call. */
- doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args,
- RetLocInt );
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
+ e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
+ vassert(is_sane_RetLoc(rloc));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
/* Move to dst, and zero out the top 32 bits if the result type is
Ity_I32. Probably overkill, but still .. */
@@ -2278,8 +2378,15 @@
vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
vassert(con->Iex.Const.con->tag == Ico_U64);
/* Marshal args, do the call. */
- doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args,
- RetLocInt );
+ UInt addToSp = 0;
+ RetLoc rloc = mk_RetLoc_INVALID();
+ doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
+ cal->Iex.CCall.cee,
+ cal->Iex.CCall.retty, cal->Iex.CCall.args );
+ vassert(is_sane_RetLoc(rloc));
+ vassert(rloc.pri == RLPri_Int);
+ vassert(addToSp == 0);
+ /* */
addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
AMD64RMI_Reg(hregAMD64_RAX()), tmp));
@@ -2590,7 +2697,7 @@
/* call the helper */
addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
(ULong)(HWord)h_generic_calc_MAddF32,
- 4, RetLocNone ));
+ 4, mk_RetLoc_simple(RLPri_None) ));
/* fetch the result from memory, using %r_argp, which the
register allocator will keep alive across the call. */
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
@@ -2769,7 +2876,7 @@
/* call the helper */
addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
(ULong)(HWord)h_generic_calc_MAddF64,
- 4, RetLocNone ));
+ 4, mk_RetLoc_simple(RLPri_None) ));
/* fetch the result from memory, using %r_argp, which the
register allocator will keep alive across the call. */
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
@@ -3491,7 +3598,7 @@
AMD64AMode_IR(0, hregAMD64_RDX())));
/* call the helper */
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
- 3, RetLocNone ));
+ 3, mk_RetLoc_simple(RLPri_None) ));
/* fetch the result from memory, using %r_argp, which the
register allocator will keep alive across the call. */
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
@@ -3540,7 +3647,7 @@
/* call the helper */
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
- 3, RetLocNone ));
+ 3, mk_RetLoc_simple(RLPri_None) ));
/* fetch the result from memory, using %r_argp, which the
register allocator will keep alive across the call. */
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
@@ -3944,7 +4051,8 @@
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
AMD64AMode_IR(48, hregAMD64_RDX())));
/* call the helper */
- addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3, RetLocNone ));
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
+ mk_RetLoc_simple(RLPri_None) ));
/* Prepare 3 arg regs:
leaq 48(%r_argp), %rdi
leaq 64(%r_argp), %rsi
@@ -3957,7 +4065,8 @@
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
...
[truncated message content] |