From: Wuweijia <wuw...@hu...> - 2018-08-28 14:40:27
|
Hi: I wrote the program running in the arm64 server. There is something difference with other program. I use fp16 to compute the result; The fp16 program can run successfully without valgrind. But with valgrind, it ran failed. There is call stack, and last word: t135 = GET:F16(722) vex: the `impossible' happened: iselStmt vex storage: T total 4464272856 bytes allocated vex storage: P total 0 bytes allocated valgrind: the 'impossible' happened: LibVEX called failure_exit(). host stacktrace: ==2201== at 0x380453C4: show_sched_status_wrk (m_libcassert.c:355) ==2201== by 0x3804572B: report_and_quit (m_libcassert.c:426) ==2201== by 0x380457CB: panic (m_libcassert.c:502) ==2201== by 0x3804576F: vgPlain_core_panic_at (m_libcassert.c:507) ==2201== by 0x380457DB: vgPlain_core_panic (m_libcassert.c:512) ==2201== by 0x38060C6B: failure_exit (m_translate.c:740) ==2201== by 0x38109AAB: vpanic (main_util.c:231) ==2201== by 0x3816C30F: iselStmt (host_arm64_isel.c:4003) ==2201== by 0x3816A7DB: iselSB_ARM64 (host_arm64_isel.c:4201) ==2201== by 0x38107D93: libvex_BackEnd (main_main.c:1047) ==2201== by 0x38107D93: LibVEX_Translate (main_main.c:1174) ==2201== by 0x38060A23: vgPlain_translate (m_translate.c:1794) ==2201== by 0x38093F47: handle_chain_me (scheduler.c:1084) ==2201== by 0x3809227F: vgPlain_scheduler (scheduler.c:0) ==2201== by 0x380A0607: thread_wrapper (syswrap-linux.c:103) ==2201== by 0x380A0607: run_a_thread_NORETURN (syswrap-linux.c:156) ==2201== by 0x380A051F: vgModuleLocal_start_thread_NORETURN (syswrap-linux.c:320) ==2201== by 0x380C2E37: ??? (in /system/lib64/valgrind/memcheck-arm64-linux) I check the code that valgrind analyze the instruction. In the Ist_WrTmp case, there is no statement to handle fp16 case. I want to know how to fixed the bug, and support fp16 type. Host_arm64_isel.c iselStmt function code as below: /* --------- TMP --------- */ /* assign value to temporary */ case Ist_WrTmp: { IRTemp tmp = stmt->Ist.WrTmp.tmp; IRType ty = typeOfIRTemp(env->type_env, tmp); if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { /* We could do a lot better here. But for the time being: */ HReg dst = lookupIRTemp(env, tmp); HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data); addInstr(env, ARM64Instr_MovI(dst, rD)); return; } if (ty == Ity_I1) { /* Here, we are generating a I1 value into a 64 bit register. Make sure the value in the register is only zero or one, but no other. This allows optimisation of the 1Uto64(tmp:I1) case, by making it simply a copy of the register holding 'tmp'. The point being that the value in the register holding 'tmp' can only have been created here. LATER: that seems dangerous; safer to do 'tmp & 1' in that case. Also, could do this just with a single CINC insn. */ /* CLONE-01 */ HReg zero = newVRegI(env); HReg one = newVRegI(env); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_Imm64(zero, 0)); addInstr(env, ARM64Instr_Imm64(one, 1)); ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data); addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); return; } if (ty == Ity_F64) { HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_VMov(8, dst, src)); return; } if (ty == Ity_F32) { HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); return; } if (ty == Ity_V128) { HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_VMov(16, dst, src)); return; } if (ty == Ity_V256) { HReg srcHi, srcLo, dstHi, dstLo; iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data); lookupIRTempPair( &dstHi, &dstLo, env, tmp); addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi)); addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo)); return; } break; } /* --------- Call to DIRTY helper --------- */ /* call complex ("dirty") helper function */ case Ist_Dirty: { valgrind vesoin 3.13. BR Owen |