From: <ti...@co...> - 2009-07-31 22:24:45
|
Author: tismer Date: Sat Aug 1 00:27:42 2009 New Revision: 66720 Modified: psyco/v2/dist/Makefile psyco/v2/dist/c/codegen.c psyco/v2/dist/c/codegen.h psyco/v2/dist/c/i386/iencoding.c psyco/v2/dist/c/i386/iencoding.h psyco/v2/dist/c/i386/iprocessor.c psyco/v2/dist/c/i386/ipyencoding.c psyco/v2/dist/c/i386/ipyencoding.h psyco/v2/dist/c/psyco.h psyco/v2/dist/py-support/core.py psyco/v2/dist/test/fulltester.py Log: Solved two long-hiding bugs in Mac Os X stack alignment. The Py_DECREF deallocator calls were not aligned. Found this with Python 2.6's new unittest implementation which deallocated a semaphore, which crashed when accessing the mmx registers. Furthermore, the frame epilog code was not aligned. I did not hit that error (the code is only called when the frame caught an exception, and it would only crash in a case like above), but re-wrote the epilog to be aligned. (Only if the finalizer gets called, of course, so we don't loose speed). This is hopefully the last time that I have to go into this. Note: while desperately hunting these errors, I panicked and evicted all thread-related code. That is: psyco.profile() is gone. Modified: psyco/v2/dist/Makefile ============================================================================== --- psyco/v2/dist/Makefile (original) +++ psyco/v2/dist/Makefile Sat Aug 1 00:27:42 2009 @@ -3,6 +3,9 @@ build: python setup.py build +install: + python setup.py install + test: python -c 'import sys; print sys.executable' > test/fulltester.local cd test && python ./fulltester.py Modified: psyco/v2/dist/c/codegen.c ============================================================================== --- psyco/v2/dist/c/codegen.c (original) +++ psyco/v2/dist/c/codegen.c Sat Aug 1 00:27:42 2009 @@ -408,6 +408,7 @@ break; } } + CHECK_ALIGNMENT_CALLING(); CALL_C_FUNCTION (c_function, count); END_CODE Modified: psyco/v2/dist/c/codegen.h ============================================================================== --- psyco/v2/dist/c/codegen.h (original) +++ psyco/v2/dist/c/codegen.h Sat Aug 1 00:27:42 2009 @@ -115,7 +115,7 @@ representing the output values. */ vinfo_t *psyco_generic_call(PsycoObject *po, void* c_function, - int flags, const char* arguments, ...); + int flags, const char* arguments, ...); /* if the C function has no side effect it can be called at compile-time if all its arguments are compile-time. Use CfPure in this case. */ Modified: psyco/v2/dist/c/i386/iencoding.c ============================================================================== --- psyco/v2/dist/c/i386/iencoding.c (original) +++ psyco/v2/dist/c/i386/iencoding.c Sat Aug 1 00:27:42 2009 @@ -25,18 +25,6 @@ /* following ESI: */ REG_386_EDI, /* following EDI: */ REG_386_EAX }; -#if 0 /* disabled */ - -code_t* psyco_emergency_jump(PsycoObject *po, code_t* code) -{ - STACK_CORRECTION(INITIAL_STACK_DEPTH - po->stack_depth); /* at most 6 bytes */ - code[0] = 0xE9; /* JMP rel32 */ - code += 5; - *(long*)(code-4) = ((code_t*)(&PyErr_NoMemory)) - code; - /* total: at most 11 bytes. Check the value of EMERGENCY_PROXY_SIZE. */ - return code; -} -#endif void* psyco_call_code_builder(PsycoObject *po, void* fn, int restore, RunTimeSource extraarg) @@ -44,7 +32,7 @@ code_t* code = po->code; void* result; code_t* fixvalue; -#ifdef __APPLE__ +#if APPLE_ALIGNMENT int aligndelta; #endif @@ -110,14 +98,14 @@ int i, initial_depth; Source* p; bool ccflags; -#ifdef __APPLE__ +#if APPLE_ALIGNMENT int aligncount=0; #endif BEGIN_CODE /* cannot use NEED_CC(): it might clobber one of the registers mentioned in argsources */ ccflags = HAS_CCREG(po); -#ifdef __APPLE__ +#if APPLE_ALIGNMENT /* Calculate number of registers that will be pushed by NEED_REGISTER */ for (i=0; i<REG_TOTAL; i++) @@ -613,95 +601,3 @@ return new_rtvinfo(po, rg, false, is_nonneg(v->source)); } -#if 0 - -- unused -- -vinfo_t *integer_and_i(PsycoObject *po, vinfo_t *v1, long value2) - GENERIC_BINARY_INSTR_2(4, a & b, /* AND */ - value2>=0 || is_rtnonneg(v1->source)) -#define GENERIC_BINARY_INSTR_2(group, c_code, nonneg) \ -{ \ - if (!compute_vinfo(v1, po)) return NULL; \ - if (is_compiletime(v1->source)) \ - { \ - long a = CompileTime_Get(v1->source)->value; \ - long b = value2; \ - long c = (c_code); \ - return vinfo_new(CompileTime_New(c)); \ - } \ - else \ - { \ - reg_t rg; \ - BEGIN_CODE \ - NEED_CC(); \ - COPY_IN_REG(v1, rg); /* MOV rg, (v1) */ \ - COMMON_INSTR_IMMED(group, rg, value2); /* XXX rg, value2 */ \ - END_CODE \ - return new_rtvinfo(po, rg, false, nonneg); \ - } \ -} -#endif - -#if 0 - (not used) -vinfo_t *integer_seqindex(PsycoObject *po, vinfo_t *vi, vinfo_t *vn, bool ovf) -{ - NonVirtualSource vns, vis; - vns = vinfo_compute(vn, po); - if (vns == SOURCE_ERROR) return NULL; - vis = vinfo_compute(vi, po); - if (vis == SOURCE_ERROR) return NULL; - - if (!is_compiletime(vis)) - { - reg_t rg, tmprg; - BEGIN_CODE - NEED_CC_SRC(vis); - NEED_FREE_REG(rg); - LOAD_REG_FROM_RT(vis, rg); - DELAY_USE_OF(rg); - NEED_FREE_REG(tmprg); - - /* Increase 'rg' by 'vns' unless it is already in the range(0, vns). */ - /* CMP i, n */ - vns = vn->source; /* reload, could have been moved by NEED_FREE_REG */ - COMMON_INSTR_FROM(7, rg, vns); - /* SBB t, t */ - COMMON_INSTR_FROM_RT(3, tmprg, RunTime_New(tmprg, false...)); - /* AND t, n */ - COMMON_INSTR_FROM(4, tmprg, vns); - /* SUB i, t */ - COMMON_INSTR_FROM_RT(5, rg, RunTime_New(tmprg, false...)); - /* ADD i, n */ - COMMON_INSTR_FROM(0, rg, vns); - END_CODE - - if (ovf && runtime_condition_f(po, CC_NB)) /* if out of range */ - return NULL; - return new_rtvinfo(po, rg, false...); - } - else - { - long index = CompileTime_Get(vis)->value; - long reqlength; - if (index >= 0) - reqlength = index; /* index is known, length must be greater than it */ - else - reqlength = ~index; /* idem for negative index */ - if (ovf) - { - /* test for out of range index -- more precisely, test that the - length is not large enough for the known index */ - condition_code_t cc = integer_cmp_i(po, vn, reqlength, Py_LE); - if (cc == CC_ERROR || runtime_condition_f(po, cc)) - return NULL; - } - if (index >= 0) - { - vinfo_incref(vi); - return vi; - } - else - return integer_add_i(po, vn, index...); - } -} -#endif /* 0 */ Modified: psyco/v2/dist/c/i386/iencoding.h ============================================================================== --- psyco/v2/dist/c/i386/iencoding.h (original) +++ psyco/v2/dist/c/i386/iencoding.h Sat Aug 1 00:27:42 2009 @@ -18,12 +18,13 @@ (a quite minor overhead). Set to 0 to disable. No effect on real optimizations. */ #ifndef COMPACT_ENCODING -#ifdef __APPLE__ -/* COMPACT_ENCODING not yet supported on MacOS X */ -# define COMPACT_ENCODING 0 -#else # define COMPACT_ENCODING 1 #endif + +#if defined(__APPLE__) || defined(DEBUG_ALIGNMENT) +# define APPLE_ALIGNMENT 1 +#else +# define APPLE_ALIGNMENT 0 #endif /* Define to 0 to use EBP as any other register, or to 1 to reserve it */ @@ -205,24 +206,6 @@ (((unsigned char)(b3))<<16) | (((unsigned char)(b4))<<24)) /* note: the following macro starts writing at code+1 */ -#if 0 -/* access stack by [EBP-n] where 'n' is fixed for the variable */ -#define MODRM_EBP_BASE(middle, stack_pos) do { \ - extra_assert(0 < (stack_pos) && (stack_pos) <= RUNTIME_STACK_MAX); \ - if (COMPACT_ENCODING && (stack_pos) <= 128) \ - { \ - code[1] = 0x45 | (middle); \ - code[2] = -(stack_pos); \ - code += 3; \ - } \ - else \ - { \ - code[1] = 0x85 | (middle); \ - *(long*)(code+2) = -(stack_pos); \ - code += 6; \ - } \ -} while (0) -#else /* access stack by [ESP+n] where 'n' varies depending on the current ESP */ #define MODRM_EBP_BASE(middle, stack_pos) do { \ int _s_p = po->stack_depth - (stack_pos); \ @@ -247,7 +230,6 @@ code += 7; \ } \ } while (0) -#endif /* Emit instruction 'opcode' having a mod/rm as its second byte. Insert 'middle' in the mod/rm. Let the mod/rm point to the given stack_pos. */ @@ -646,7 +628,7 @@ JUMP_TO((code_t*)(target)); \ } while (0) -#ifdef __APPLE__ +#if APPLE_ALIGNMENT /* Stack alignment for MacOS X IA-32 ABI */ #define CALL_STACK_ALIGN_DELTA(nbargs, delta) do { \ int sp = po->stack_depth-INITIAL_STACK_DEPTH+(nbargs)*4; \ @@ -711,71 +693,75 @@ } \ } while (0) +/* disable compact encoding if we need alignment. + Only relevant in psyco_call_code_builder */ +#define _COMPACT_ENCODING_OK (COMPACT_ENCODING && !APPLE_ALIGNMENT) + /* saving and restoring the registers currently in use (see also SAVE_REGS_FN_CALLS) */ -#define TEMP_SAVE_REGS_FN_CALLS do { \ - if (COMPACT_ENCODING) { \ - if (REG_NUMBER(po, REG_386_EAX) != NULL) PUSH_REG(REG_386_EAX); \ - if (REG_NUMBER(po, REG_386_ECX) != NULL) PUSH_REG(REG_386_ECX); \ - if (REG_NUMBER(po, REG_386_EDX) != NULL) PUSH_REG(REG_386_EDX); \ - if (HAS_CCREG(po)) PUSH_CC_FLAGS(); \ - } \ - else { \ - CODE_FOUR_BYTES(code, \ - PUSH_REG_INSTR(REG_386_EAX), \ - PUSH_REG_INSTR(REG_386_ECX), \ - PUSH_REG_INSTR(REG_386_EDX), \ - PUSH_CC_FLAGS_INSTR); \ - code += 4; \ - } \ +#define TEMP_SAVE_REGS_FN_CALLS do { \ + if (_COMPACT_ENCODING_OK) { \ + if (REG_NUMBER(po, REG_386_EAX) != NULL) PUSH_REG(REG_386_EAX); \ + if (REG_NUMBER(po, REG_386_ECX) != NULL) PUSH_REG(REG_386_ECX); \ + if (REG_NUMBER(po, REG_386_EDX) != NULL) PUSH_REG(REG_386_EDX); \ + if (HAS_CCREG(po)) PUSH_CC_FLAGS(); \ + } \ + else { \ + CODE_FOUR_BYTES(code, \ + PUSH_REG_INSTR(REG_386_EAX), \ + PUSH_REG_INSTR(REG_386_ECX), \ + PUSH_REG_INSTR(REG_386_EDX), \ + PUSH_CC_FLAGS_INSTR); \ + code += 4; \ + } \ } while (0) -#define TEMP_RESTORE_REGS_FN_CALLS do { \ - if (COMPACT_ENCODING) { \ - if (HAS_CCREG(po)) POP_CC_FLAGS(); \ - if (REG_NUMBER(po, REG_386_EDX) != NULL) POP_REG(REG_386_EDX); \ - if (REG_NUMBER(po, REG_386_ECX) != NULL) POP_REG(REG_386_ECX); \ - if (REG_NUMBER(po, REG_386_EAX) != NULL) POP_REG(REG_386_EAX); \ - } \ - else { \ - CODE_FOUR_BYTES(code, \ - POP_CC_FLAGS_INSTR, \ - POP_REG_INSTR(REG_386_EDX), \ - POP_REG_INSTR(REG_386_ECX), \ - POP_REG_INSTR(REG_386_EAX)); \ - code += 4; \ - } \ +#define TEMP_RESTORE_REGS_FN_CALLS do { \ + if (_COMPACT_ENCODING_OK) { \ + if (HAS_CCREG(po)) POP_CC_FLAGS(); \ + if (REG_NUMBER(po, REG_386_EDX) != NULL) POP_REG(REG_386_EDX); \ + if (REG_NUMBER(po, REG_386_ECX) != NULL) POP_REG(REG_386_ECX); \ + if (REG_NUMBER(po, REG_386_EAX) != NULL) POP_REG(REG_386_EAX); \ + } \ + else { \ + CODE_FOUR_BYTES(code, \ + POP_CC_FLAGS_INSTR, \ + POP_REG_INSTR(REG_386_EDX), \ + POP_REG_INSTR(REG_386_ECX), \ + POP_REG_INSTR(REG_386_EAX)); \ + code += 4; \ + } \ } while (0) /* same as above, but concludes with a JMP *EAX */ -#define TEMP_RESTORE_REGS_FN_CALLS_AND_JUMP do { \ - if (COMPACT_ENCODING) { \ - if (HAS_CCREG(po)) POP_CC_FLAGS(); \ - if (REG_NUMBER(po, REG_386_EDX) != NULL) POP_REG(REG_386_EDX); \ - if (REG_NUMBER(po, REG_386_ECX) != NULL) POP_REG(REG_386_ECX); \ - } \ - else { \ - CODE_FOUR_BYTES(code, \ - POP_CC_FLAGS_INSTR, \ - POP_REG_INSTR(REG_386_EDX), \ - POP_REG_INSTR(REG_386_ECX), \ - 0 /* dummy */); \ - code += 3; \ - } \ - if (!COMPACT_ENCODING || REG_NUMBER(po, REG_386_EAX) != NULL) { \ - /* must restore EAX, but it contains the jump target... */ \ - CODE_FOUR_BYTES(code, \ - 0x87, \ - 0x04, \ - 0x24, /* XCHG EAX, [ESP] */ \ - 0xC3); /* RET */ \ - code += 4; \ - } \ - else { \ - code[0] = 0xFF; \ - code[1] = 0xE0; /* JMP *EAX */ \ - code += 2; \ - } \ +#define TEMP_RESTORE_REGS_FN_CALLS_AND_JUMP do { \ + if (_COMPACT_ENCODING_OK) { \ + if (HAS_CCREG(po)) POP_CC_FLAGS(); \ + if (REG_NUMBER(po, REG_386_EDX) != NULL) POP_REG(REG_386_EDX); \ + if (REG_NUMBER(po, REG_386_ECX) != NULL) POP_REG(REG_386_ECX); \ + } \ + else { \ + CODE_FOUR_BYTES(code, \ + POP_CC_FLAGS_INSTR, \ + POP_REG_INSTR(REG_386_EDX), \ + POP_REG_INSTR(REG_386_ECX), \ + 0 /* dummy */); \ + code += 3; \ + } \ + if (!_COMPACT_ENCODING_OK || REG_NUMBER(po, REG_386_EAX) != NULL) { \ + /* must restore EAX, but it contains the jump target... */ \ + CODE_FOUR_BYTES(code, \ + 0x87, \ + 0x04, \ + 0x24, /* XCHG EAX, [ESP] */ \ + 0xC3); /* RET */ \ + code += 4; \ + } \ + else { \ + code[0] = 0xFF; \ + code[1] = 0xE0; /* JMP *EAX */ \ + code += 2; \ + } \ } while (0) /* put an immediate value in memory */ @@ -1099,4 +1085,35 @@ #define ALIGN_NO_FILL() ALIGN_PAD_CODE_PTR() #endif +/* hunting alignment bugs */ +#if DEBUG_ALIGNMENT +#define CHECK_ALIGNMENT_ENTERING() do { \ + /* we are entering with the pushed ret-addr */ \ + PUSH_CC_FLAGS(); \ + PUSH_CC_FLAGS(); \ + PUSH_CC_FLAGS(); \ + /* we should now be 16 byte aligned */ \ + code[0] = 0xf7; \ + code[1] = 0xc4; /* TEST ESP, 0xf */ \ + code += 2; \ + CODE_FOUR_BYTES(code, 0xf, 0, 0, 0); \ + code += 4; \ + code[0] = 0x74; /* JZ +1 */ \ + code[1] = 0x01; \ + code[2] = 0xcc; /* INT 3 */ \ + code += 3; \ + POP_CC_FLAGS(); \ + POP_CC_FLAGS(); \ + POP_CC_FLAGS(); \ +} while (0) +#define CHECK_ALIGNMENT_CALLING() do { \ + PUSH_CC_FLAGS(); \ + CHECK_ALIGNMENT_ENTERING(); \ + POP_CC_FLAGS(); \ +} while (0) +#else +#define CHECK_ALIGNMENT_ENTERING() +#define CHECK_ALIGNMENT_CALLING() +#endif + #endif /* _IENCODING_H */ Modified: psyco/v2/dist/c/i386/iprocessor.c ============================================================================== --- psyco/v2/dist/c/i386/iprocessor.c (original) +++ psyco/v2/dist/c/i386/iprocessor.c Sat Aug 1 00:27:42 2009 @@ -3,45 +3,100 @@ #include "../dispatcher.h" #include "../codemanager.h" #include "../Python/frames.h" +#include "ipyencoding.h" -/* define to copy static machine code in the heap before running it. - I've seen some Linux distributions in which the static data pages - are not executable by default. */ -#define COPY_CODE_IN_HEAP - -/* glue code for psyco_processor_run(). */ -static code_t glue_run_code[] = { - 0x8B, 0x44, 0x24, 4, /* MOV EAX, [ESP+4] (code target) */ - 0x8B, 0x4C, 0x24, 8, /* MOV ECX, [ESP+8] (stack end) */ - 0x8B, 0x54, 0x24, 12, /* MOV EDX, [ESP+12] (initial stack) */ - PUSH_REG_INSTR(REG_386_EBP), /* PUSH EBP */ - PUSH_REG_INSTR(REG_386_EBX), /* PUSH EBX */ - PUSH_REG_INSTR(REG_386_ESI), /* PUSH ESI */ - PUSH_REG_INSTR(REG_386_EDI), /* PUSH EDI */ - 0x8B, 0x5C, 0x24, 32, /* MOV EBX, [ESP+32] (finfo frame stack ptr) */ -#ifdef __APPLE__ - /* Align stack on 16-byte boundary for MacOS X */ - 0x83, 0xEC, 8, /* SUB ESP, 8 */ +/* About alignment on Mac/Os X + --------------------------- + + The stack layout on Os X is a bit special. + See http://developer.apple.com/documentation/developertools/Conceptual/LowLevelABI/130-IA-32_Function_Calling_Conventions/IA32.html + All function calls are arranged in a way that at the exact location + of the call instruction, the stack addresses are all zero mod 16. + This is so because Os X uses the mmx registers, which need aligned + stack positions. + + Os X always keeps this alignment implicitly, assuming that everything + started with correct alignment. In order to simulate that on other + platforms (for easy debugging), it is necessary to add extra code + to round the stack pointer down. + + Another important fact is the calling convention. A call to a function + pushes the return address (4 bytes) on the stack. On the Mac, there + is always one word more allocated for the frame pointer, right below + the return address. + This is true, even when compiling with -fomit-frame-pointer ! + + So the stack overhead of a function call is always 8 on a Mac, + and in order to keep the stack aligned, we have to subtract + another 8 from the stack pointer. +*/ + +#define C1(b1) (*code++ = (b1)) +#define C2(b1, b2) do { C1(b1); C1(b2); } while(0) +#define C3(b1, b2, b3) do { C2(b1, b2); C1(b3);} while(0) +#define C4(b1, b2, b3, b4) do { C2(b1, b2); C2(b3, b4); } while(0) +#define C5(b1, b2, b3, b4, b5) do { C2(b1, b2); C2(b3, b4); C1(b5); } while(0) +#define CW(w) do { *(long *) code = w; code += 4; } while(0) + +typedef code_t *(*build_fn)(code_t *buf); + +static code_t *compile_stub_argloop(code_t *code) +{ + C2(0x6A, -1); /* PUSH -1 */ + /* stack must be aligned right here! */ + C2(0x89, 0x23); /* MOV [EBX], ESP */ + C2(0xEB, +5); /* JMP Label2 */ + /* Label1: */ + C3(0x83, 0xE9, 4); /* SUB ECX, 4 */ + C2(0xFF, 0x31); /* PUSH [ECX] */ + /* Label2: */ + C2(0x39, 0xCA); /* CMP EDX, ECX */ + C2(0x75, -9); /* JNE Label1 */ + C2(0xFF, 0xD0); /* CALL *EAX (callee removes args) */ + + return code; +} + +static code_t * build_glue_code (code_t *code){ + C4(0x8B, 0x44, 0x24, 4); /* MOV EAX, [ESP+4] (code target) */ + C4(0x8B, 0x4C, 0x24, 8); /* MOV ECX, [ESP+8] (stack end) */ + C4(0x8B, 0x54, 0x24, 12); /* MOV EDX, [ESP+12] (initial stack) */ + PUSH_REG(REG_386_EBP); + PUSH_REG(REG_386_EBX); + PUSH_REG(REG_386_ESI); + PUSH_REG(REG_386_EDI); + C4(0x8B, 0x5C, 0x24, 32); /* MOV EBX, [ESP+32] + (finfo frame stack ptr) */ +#if APPLE_ALIGNMENT +# ifdef __APPLE__ + /* Align stack on 16-byte boundary for MacOS X */ + C3(0x83, 0xEC, 8); /* SUB ESP, 8 */ +# else + /* simulate Apple's alignment */ + C2(0x89, 0xE5); /* MOV EBP, ESP */ + C3(0x83, 0xEC, 12+8); /* SUB ESP, 12+8 */ + C2(0x81, 0xE4); CW(~12); /* AND ESP, ~12 */ + C3(0x83, 0xC4, 8); /* ADD ESP, 8 */ + PUSH_REG(REG_386_EBP); /* PUSH EBP */ + code = compile_stub_argloop(code); + POP_REG(REG_386_EBP); /* POP EBP */ + C2(0x89, 0xEC); /* MOV ESP, EBP */ + goto epilog; +# endif #endif - 0x6A, -1, /* PUSH -1 */ - 0x89, 0x23, /* MOV [EBX], ESP */ - 0xEB, +5, /* JMP Label2 */ - /* Label1: */ - 0x83, 0xE9, 4, /* SUB ECX, 4 */ - 0xFF, 0x31, /* PUSH [ECX] */ - /* Label2: */ - 0x39, 0xCA, /* CMP EDX, ECX */ - 0x75, -9, /* JNE Label1 */ - 0xFF, 0xD0, /* CALL *EAX (callee removes args) */ + code = compile_stub_argloop(code); #ifdef __APPLE__ - /* Restore stack from 16-byte alignment on MacOS X */ - 0x83, 0xC4, 8, /* ADD ESP, 8 */ + /* Restore stack from 16-byte alignment on MacOS X */ + C3(0x83, 0xC4, 8); /* ADD ESP, 8 */ #endif - POP_REG_INSTR(REG_386_EDI), /* POP EDI */ - POP_REG_INSTR(REG_386_ESI), /* POP ESI */ - POP_REG_INSTR(REG_386_EBX), /* POP EBX */ - POP_REG_INSTR(REG_386_EBP), /* POP EBP */ - 0xC3, /* RET */ +epilog: + POP_REG(REG_386_EDI); + POP_REG(REG_386_ESI); + POP_REG(REG_386_EBX); + POP_REG(REG_386_EBP); + C1(0xC3); /* RET */ + + return code; }; typedef PyObject* (*glue_run_code_fn) (code_t* code_target, @@ -49,94 +104,94 @@ long* initial_stack, struct stack_frame_info_s*** finfo); -#ifdef COPY_CODE_IN_HEAP -static glue_run_code_fn glue_run_code_1; -#else -# define glue_run_code_1 ((glue_run_code_fn) glue_run_code) -#endif +static glue_run_code_fn glue_run_code; PyObject* psyco_processor_run(CodeBufferObject* codebuf, - long initial_stack[], - struct stack_frame_info_s*** finfo, - PyObject* tdict) + long initial_stack[], + struct stack_frame_info_s*** finfo, + PyObject* tdict) { int argc = RUN_ARGC(codebuf); - return glue_run_code_1(codebuf->codestart, initial_stack + argc, - initial_stack, finfo); + return glue_run_code(codebuf->codestart, initial_stack + argc, + initial_stack, finfo); } /* call a C function with a variable number of arguments */ long (*psyco_call_var) (void* c_func, int argcount, long arguments[]); -static code_t glue_call_var[] = { - 0x53, /* PUSH EBX */ - 0x8B, 0x5C, 0x24, 12, /* MOV EBX, [ESP+12] (argcount) */ - 0x8B, 0x44, 0x24, 8, /* MOV EAX, [ESP+8] (c_func) */ -#ifdef __APPLE__ +/* Note: + This code is called for functions which have CfPure set. + They are called at compile time, only. Therefore, the support + code is not time critical. + For simulating correct alignment on non-Mac/Os platforms, extra + effort is needed to align the stack at the call. + We simplify matters by always aligning the call to 16 bytes, + regardless which platform. + */ + +static code_t *build_call_var(code_t *code) +{ + PUSH_REG(REG_386_EBX); + C4(0x8B, 0x54, 0x24, 16); /* MOV EDX, [ESP+16] (arguments) */ + C4(0x8B, 0x5C, 0x24, 12); /* MOV EBX, [ESP+12] (argcount) */ + C4(0x8B, 0x44, 0x24, 8); /* MOV EAX, [ESP+8] (c_func) */ + /* Adjust # of arguments for MacOS 16-byte stack alignment */ - 0x83, 0xC3, 3, /* ADD EBX, 3 */ - 0x83, 0xE3, ~3, /* AND EBX, ~3 */ - /* Align stack on 16-byte boundary for MacOS X */ - 0x83, 0xEC, 8, /* SUB ESP, 8 */ -#endif - 0x09, 0xDB, /* OR EBX, EBX */ - 0x74, +16, /* JZ Label1 */ -#ifdef __APPLE__ - /* Arguments are 8 bytes further up stack on MacOS X */ - 0x8B, 0x54, 0x24, 24, /* MOV EDX, [ESP+24] (arguments) */ -#else - 0x8B, 0x54, 0x24, 16, /* MOV EDX, [ESP+16] (arguments) */ -#endif - 0x8D, 0x0C, 0x9A, /* LEA ECX, [EDX+4*EBX] */ - /* Label2: */ - 0x83, 0xE9, 4, /* SUB ECX, 4 */ - 0xFF, 0x31, /* PUSH [ECX] */ - 0x39, 0xCA, /* CMP EDX, ECX */ - 0x75, -9, /* JNE Label2 */ - /* Label1: */ - 0xFF, 0xD0, /* CALL *EAX */ -#ifdef __APPLE__ - /* Restore stack from 16-byte alignment on MacOS X */ - 0x83, 0xC4, 8, /* ADD ESP, 8 */ -#endif - 0x8D, 0x24, 0x9C, /* LEA ESP, [ESP+4*EBX] */ - 0x5B, /* POP EBX */ - 0xC3, /* RET */ + C3(0x83, 0xC3, 3); /* ADD EBX, 3 */ + C3(0x83, 0xE3, ~3); /* AND EBX, ~3 */ + + /* align the stack, regardless of platform */ + PUSH_REG(REG_386_EBP); /* PUSH EBP */ + C2(0x89, 0xE5); /* MOV EBP, ESP */ + C3(0x83, 0xEC, 12); /* SUB ESP, 12 */ + C2(0x81, 0xE4); CW(~12); /* AND ESP, ~12 */ + + /* supply the arguments and call */ + C2(0x09, 0xDB); /* OR EBX, EBX */ + C2(0x74, +12); /* JZ Label1 */ + C3(0x8D, 0x0C, 0x9A); /* LEA ECX, [EDX+4*EBX] */ + /* Label2: */ + C3(0x83, 0xE9, 4); /* SUB ECX, 4 */ + C2(0xFF, 0x31); /* PUSH [ECX] */ + C2(0x39, 0xCA); /* CMP EDX, ECX */ + C2(0x75, -9); /* JNE Label2 */ + /* Label1: */ + C2(0xFF, 0xD0); /* CALL *EAX */ + + /* restore the stack */ + C2(0x89, 0xEC); /* MOV ESP, EBP */ + POP_REG(REG_386_EBP); /* POP EBP */ + POP_REG(REG_386_EBX); /* POP EBX */ + C1(0xC3); /* RET */ + + return code; }; /* check for signed integer multiplication overflow */ char (*psyco_int_mul_ovf) (long a, long b); -static code_t glue_int_mul[] = { - 0x8B, 0x44, 0x24, 8, /* MOV EAX, [ESP+8] (a) */ - 0x0F, 0xAF, 0x44, 0x24, 4, /* IMUL EAX, [ESP+4] (b) */ - 0x0F, 0x90, 0xC0, /* SETO AL */ - 0xC3, /* RET */ -}; +static code_t *build_int_mul(code_t *code) +{ + C4(0x8B, 0x44, 0x24, 8); /* MOV EAX, [ESP+8] (a) */ + C5(0x0F, 0xAF, 0x44, 0x24, 4); /* IMUL EAX, [ESP+4] (b) */ + C3(0x0F, 0x90, 0xC0); /* SETO AL */ + C1(0xC3); /* RET */ + + return code; +} -#ifdef COPY_CODE_IN_HEAP -static code_t* internal_copy_code(void* source, int size) { +static code_t *internal_build_code(build_fn func) +{ CodeBufferObject* codebuf = psyco_new_code_buffer(NULL, NULL, NULL); code_t* code = codebuf->codestart; - memcpy(code, source, size); - SHRINK_CODE_BUFFER(codebuf, code+size, "glue"); + code_t* code_end = func(code); + SHRINK_CODE_BUFFER(codebuf, code_end, "glue"); return code; } -# define COPY_CODE(target, source, type) do { \ - target = (type) internal_copy_code(source, sizeof(source)); \ +# define BUILD_CODE(target, func, type) do { \ + target = (type) internal_build_code(func); \ } while (0) -#else -# define COPY_CODE(target, source, type) (target = (type) source) -#endif -void psyco_processor_init(void) -{ -#ifdef COPY_CODE_IN_HEAP - COPY_CODE(glue_run_code_1, glue_run_code, glue_run_code_fn); -#endif - COPY_CODE(psyco_int_mul_ovf, glue_int_mul, char(*)(long, long)); - COPY_CODE(psyco_call_var, glue_call_var, long(*)(void*, int, long[])); -} struct stack_frame_info_s** psyco_next_stack_frame(struct stack_frame_info_s** finfo) @@ -146,3 +201,38 @@ return (struct stack_frame_info_s**) (((char*) finfo) - finfo_last(*finfo)->link_stack_depth); } + +code_t *finalize_frame_locals_helper(code_t *code) +{ + code_t *here; + + C4(0x83, 0x3C, 0x24, 0); /* CMP [ESP], 0 */ + C2(0x70 | CC_E, 0); /* JE exit */ + here = code; + PUSH_REG(REG_386_EBP); /* PUSH EBP */ + C2(0x89, 0xE5); /* MOV EBP, ESP */ + C3(0x83, 0xEC, 12); /* SUB ESP, 12 */ + C2(0x81, 0xE4); CW(~12); /* AND ESP, ~12 */ + C3(0x83, 0xEC, 4); /* SUB ESP, 4 */ + /* cannot push indirect via EBP, use ECX */ + C3(0x8B, 0x4D, 12); /* MOV ECX, [EBP+12] */ + PUSH_REG(REG_386_ECX); /* PUSH ECX */ + C3(0x8B, 0x4D, 8); /* MOV ECX, [EBP+8] */ + PUSH_REG(REG_386_ECX); /* PUSH ECX */ + C3(0x8B, 0x4D, 4); /* MOV ECX, [EBP+4] */ + PUSH_REG(REG_386_ECX); /* PUSH ECX */ + C1(0xE8); /* CALL cimpl_finalize_frame_locals */ + CW((code_t*)(&cimpl_finalize_frame_locals) - (code +4)); + C2(0x89, 0xEC); /* MOV ESP, EBP */ + POP_REG(REG_386_EBP); /* POP EBP */ + /* exit: */ + *(here-1) = code - here; + return code; +}; + +void psyco_processor_init(void) +{ + BUILD_CODE(glue_run_code, build_glue_code, glue_run_code_fn); + BUILD_CODE(psyco_int_mul_ovf, build_int_mul, char(*)(long, long)); + BUILD_CODE(psyco_call_var, build_call_var, long(*)(void*, int, long[])); +} Modified: psyco/v2/dist/c/i386/ipyencoding.c ============================================================================== --- psyco/v2/dist/c/i386/ipyencoding.c (original) +++ psyco/v2/dist/c/i386/ipyencoding.c Sat Aug 1 00:27:42 2009 @@ -2,67 +2,75 @@ #include "../pycodegen.h" code_t* decref_dealloc_calling(code_t* code, PsycoObject *po, reg_t rg, - destructor fn) + destructor fn) { - code_t* code_origin; - int save_eax, save_ecx, save_edx; - reg_t last_reg; - DEC_OB_REFCNT_NZ(rg); - extra_assert(offsetof(PyObject, ob_type) < 128); - extra_assert(offsetof(PyTypeObject, tp_dealloc) < 128); - code[0] = 0x75; /* JNZ rel8 */ - code += 2; - code_origin = code; - if (COMPACT_ENCODING) { - save_eax = REG_NUMBER(po, REG_386_EAX) != NULL; - save_ecx = REG_NUMBER(po, REG_386_ECX) != NULL; - save_edx = REG_NUMBER(po, REG_386_EDX) != NULL; - last_reg = REG_386_EAX; - if (save_eax) PUSH_REG(REG_386_EAX); - if (save_ecx) { PUSH_REG(REG_386_ECX); last_reg = REG_386_ECX; } - if (save_edx) { PUSH_REG(REG_386_EDX); last_reg = REG_386_EDX; } - PUSH_REG(rg); - } - else { - CODE_FOUR_BYTES(code, - PUSH_REG_INSTR(REG_386_EAX), - PUSH_REG_INSTR(REG_386_ECX), - PUSH_REG_INSTR(REG_386_EDX), - PUSH_REG_INSTR(rg)); - code += 4; - } - if (fn == NULL) { - code[0] = 0x8B; /* MOV EAX, [reg+ob_type] */ - code[1] = 0x40 | (rg); - CODE_FOUR_BYTES(code+2, - offsetof(PyObject, ob_type), - 0xFF, /* CALL [EAX+tp_dealloc] */ - 0x50, - offsetof(PyTypeObject, tp_dealloc)); - code += 6; - } - else { - code[0] = 0xE8; /* CALL */ - code += 5; - *(long*)(code-4) = (code_t*)(fn) - code; - } - if (COMPACT_ENCODING) { - POP_REG(last_reg); /* pop argument back */ - if (save_edx) POP_REG(REG_386_EDX); - if (save_ecx) POP_REG(REG_386_ECX); - if (save_eax) POP_REG(REG_386_EAX); - } - else { - CODE_FOUR_BYTES(code, - POP_REG_INSTR(REG_386_EDX), - POP_REG_INSTR(REG_386_EDX), - POP_REG_INSTR(REG_386_ECX), - POP_REG_INSTR(REG_386_EAX)); - code += 4; - } - extra_assert(code-code_origin < 128); - code_origin[-1] = (code_t)(code-code_origin); - return code; + code_t* code_origin; + int save_eax = 1, save_ecx = 1, save_edx = 1; + reg_t last_reg; + int aligndelta, aligncount; + + DEC_OB_REFCNT_NZ(rg); + extra_assert(offsetof(PyObject, ob_type) < 128); + extra_assert(offsetof(PyTypeObject, tp_dealloc) < 128); + code[0] = 0x75; /* JNZ rel8 */ + code += 2; + code_origin = code; + if (COMPACT_ENCODING) { + save_eax = REG_NUMBER(po, REG_386_EAX) != NULL; + save_ecx = REG_NUMBER(po, REG_386_ECX) != NULL; + save_edx = REG_NUMBER(po, REG_386_EDX) != NULL; + } + aligncount = save_eax + save_ecx + save_edx; + CALL_STACK_ALIGN_DELTA(1 + aligncount, aligndelta); + if (COMPACT_ENCODING) { + last_reg = REG_386_EAX; + if (save_eax) PUSH_REG(REG_386_EAX); + if (save_ecx) { PUSH_REG(REG_386_ECX); last_reg = REG_386_ECX; } + if (save_edx) { PUSH_REG(REG_386_EDX); last_reg = REG_386_EDX; } + PUSH_REG(rg); + } + else { + CODE_FOUR_BYTES(code, + PUSH_REG_INSTR(REG_386_EAX), + PUSH_REG_INSTR(REG_386_ECX), + PUSH_REG_INSTR(REG_386_EDX), + PUSH_REG_INSTR(rg)); + code += 4; + } + CHECK_ALIGNMENT_CALLING(); + if (fn == NULL) { + code[0] = 0x8B; /* MOV EAX, [reg+ob_type] */ + code[1] = 0x40 | (rg); + CODE_FOUR_BYTES(code+2, + offsetof(PyObject, ob_type), + 0xFF, /* CALL [EAX+tp_dealloc] */ + 0x50, + offsetof(PyTypeObject, tp_dealloc)); + code += 6; + } + else { + code[0] = 0xE8; /* CALL */ + code += 5; + *(long*)(code-4) = (code_t*)(fn) - code; + } + if (COMPACT_ENCODING) { + POP_REG(last_reg); /* pop argument back */ + if (save_edx) POP_REG(REG_386_EDX); + if (save_ecx) POP_REG(REG_386_ECX); + if (save_eax) POP_REG(REG_386_EAX); + } + else { + CODE_FOUR_BYTES(code, + POP_REG_INSTR(REG_386_EDX), + POP_REG_INSTR(REG_386_EDX), + POP_REG_INSTR(REG_386_ECX), + POP_REG_INSTR(REG_386_EAX)); + code += 4; + } + CALL_STACK_ALIGN_RESTORE(aligndelta); + extra_assert(code-code_origin < 128); + code_origin[-1] = (code_t)(code-code_origin); + return code; } void decref_create_new_ref(PsycoObject *po, vinfo_t *w) @@ -94,7 +102,7 @@ extra_assert(!RUNTIME_REG_IS_NONE(w)); INC_OB_REFCNT(RUNTIME_REG(w)); } - END_CODE + END_CODE } return could_eat; } Modified: psyco/v2/dist/c/i386/ipyencoding.h ============================================================================== --- psyco/v2/dist/c/i386/ipyencoding.h (original) +++ psyco/v2/dist/c/i386/ipyencoding.h Sat Aug 1 00:27:42 2009 @@ -291,21 +291,30 @@ } while (0) /* called by psyco_finish_return() */ -#define FINALIZE_FRAME_LOCALS(nframelocal) do { \ - CODE_FOUR_BYTES(code, \ - 0x83, \ - 0x3C, /* CMP [ESP], 0 */ \ - 0x24, \ - 0); \ - code[4] = 0x70 | CC_E; /* JE exit */ \ - code[5] = 11 - 6; \ - code[6] = 0xE8; /* CALL cimpl_finalize_frame_locals */ \ - code += 11; \ - *(long*)(code-4) = (code_t*)(&cimpl_finalize_frame_locals) - code; \ +#if APPLE_ALIGNMENT + /* The stack is not aligned in this context. + implemented in iprocessor.c */ +code_t *finalize_frame_locals_helper(code_t *code); + +# define FINALIZE_FRAME_LOCALS(nframelocal) \ + code = finalize_frame_locals_helper(code) +#else +# define FINALIZE_FRAME_LOCALS(nframelocal) do { \ + CODE_FOUR_BYTES(code, \ + 0x83, \ + 0x3C, /* CMP [ESP], 0 */ \ + 0x24, \ + 0); \ + code[4] = 0x70 | CC_E; /* JE exit */ \ + code[5] = 11 - 6; \ + code[6] = 0xE8; /* CALL cimpl_finalize_frame_locals */ \ + code += 11; \ + *(long*)(code-4) = (code_t*)(&cimpl_finalize_frame_locals) - code; \ } while (0) +#endif -#define WRITE_FRAME_EPILOGUE(retval, nframelocal) do { \ - /* load the return value into EAX for regular functions, EBX for functions \ +#define WRITE_FRAME_EPILOGUE(retval, nframelocal) do { \ + /* load the return value into EAX for regular functions, EBX for functions \ with a prologue */ \ if (retval != SOURCE_DUMMY) { \ reg_t rg = nframelocal>0 ? REG_ANY_CALLEE_SAVED : REG_FUNCTIONS_RETURN; \ Modified: psyco/v2/dist/c/psyco.h ============================================================================== --- psyco/v2/dist/c/psyco.h (original) +++ psyco/v2/dist/c/psyco.h Sat Aug 1 00:27:42 2009 @@ -18,7 +18,11 @@ # endif #endif -#define COMPARE_CODE_BY_IDENTITY 0 +#ifndef DEBUG_ALIGNMENT +# define DEBUG_ALIGNMENT 0 +#endif + +#define COMPARE_CODE_BY_IDENTITY 0 /* set to 0 to disable all debugging checks and output */ #ifndef PSYCO_DEBUG Modified: psyco/v2/dist/py-support/core.py ============================================================================== --- psyco/v2/dist/py-support/core.py (original) +++ psyco/v2/dist/py-support/core.py Sat Aug 1 00:27:42 2009 @@ -44,7 +44,7 @@ Mode is 'a' to append to a possibly existing file or 'w' to overwrite an existing file. Note that the log file may grow quickly in 'a' mode.""" - import profiler, logger + import logger if not logfile: import os logfile, dummy = os.path.splitext(sys.argv[0]) @@ -58,7 +58,6 @@ # in Python, as Psyco will invoke them while compiling. logger.current = open(logfile, mode) logger.print_charges = top - profiler.logger = logger logger.writedate('Logging started') cannotcompile(logger.psycowrite) _psyco.statwrite(logger=logger.psycowrite) Modified: psyco/v2/dist/test/fulltester.py ============================================================================== --- psyco/v2/dist/test/fulltester.py (original) +++ psyco/v2/dist/test/fulltester.py Sat Aug 1 00:27:42 2009 @@ -31,7 +31,7 @@ print >> sys.stderr, "list of Python executables of various versions." sys.exit(2) -PYTHON_VERSIONS = [s.strip() for s in f.readlines()] +PYTHON_VERSIONS = [s.strip() for s in f.readlines() if s and s.strip()[:1] != '#'] f.close() PSYCO_MODES = [ |