From: <ebo...@us...> - 2003-07-16 19:43:33
|
Update of /cvsroot/alleg/allegro/src/i386 In directory sc8-pr-cvs1:/tmp/cvs-serv3637/src/i386 Modified Files: blit.inc iblit16.s iblit32.s iblit8.s iscan.s iscanmmx.s ispr8.s izbuf.s Log Message: Henrik removed a GNUish preprocessor construct. Index: blit.inc =================================================================== RCS file: /cvsroot/alleg/allegro/src/i386/blit.inc,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- blit.inc 12 Jul 2002 22:51:15 -0000 1.2 +++ blit.inc 16 Jul 2003 19:43:28 -0000 1.3 @@ -22,7 +22,7 @@ /* generic framework for constructing blitting routines, shared - * between the 8, 16, 24, and 32 bit versions of the code... + * between the 8, 16, 24, and 32 bit versions of the code. */ @@ -37,7 +37,7 @@ -#define BLIT_LOOP(name, bpp, code...) \ +#define BLIT_LOOP(name, bpp, code) \ blit_loop_##name: ; \ movl B_DEST, %edx /* destination bitmap */ ; \ movl B_DEST_Y, %eax /* line number */ ; \ Index: iblit16.s =================================================================== RCS file: /cvsroot/alleg/allegro/src/i386/iblit16.s,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- iblit16.s 12 Jul 2002 22:51:15 -0000 1.12 +++ iblit16.s 16 Jul 2003 19:43:28 -0000 1.13 @@ -333,78 +333,81 @@ _align_ blit_mmxlong_long: /* blit longs64 and long */ - BLIT_LOOP(long_longsmmx, 2, - even_llmmx_loop: - movq %ds:(%esi), %mm0 ; - addl $8, %esi ; - movq %mm0, %es:(%edi) ; - addl $8, %edi ; - decl %ecx ; - jnz even_llmmx_loop ; + #define BLIT_CODE \ + even_llmmx_loop: \ + movq %ds:(%esi), %mm0 ; \ + addl $8, %esi ; \ + movq %mm0, %es:(%edi) ; \ + addl $8, %edi ; \ + decl %ecx ; \ + jnz even_llmmx_loop ; \ movsl - ) + BLIT_LOOP(long_longsmmx, 2, BLIT_CODE) + #undef BLIT_CODE emms jmp blit_done _align_ blit_mmxlong_word: - BLIT_LOOP(word_longsmmx, 2, - even_wlmmx_loop: - movq %ds:(%esi), %mm0 ; - addl $8, %esi ; - movq %mm0, %es:(%edi) ; - addl $8, %edi ; - decl %ecx ; - jnz even_wlmmx_loop ; - movsl ; + #define BLIT_CODE \ + even_wlmmx_loop: \ + movq %ds:(%esi), %mm0 ; \ + addl $8, %esi ; \ + movq %mm0, %es:(%edi) ; \ + addl $8, %edi ; \ + decl %ecx ; \ + jnz even_wlmmx_loop ; \ + movsl ; \ movsw - ) + BLIT_LOOP(word_longsmmx, 2, BLIT_CODE) + #undef BLIT_CODE emms jmp blit_done _align_ blit_even_wmmxlongs: - BLIT_LOOP(word_wlongsmmx, 2, - even_wmmx_loop: - movq %ds:(%esi), %mm0 ; - addl $8, %esi ; - movq %mm0, %es:(%edi) ; - addl $8, %edi ; - decl %ecx ; - jnz even_wmmx_loop ; + #define BLIT_CODE \ + even_wmmx_loop: \ + movq %ds:(%esi), %mm0 ; \ + addl $8, %esi ; \ + movq %mm0, %es:(%edi) ; \ + addl $8, %edi ; \ + decl %ecx ; \ + jnz even_wmmx_loop ; \ movsw - ) + BLIT_LOOP(word_wlongsmmx, 2, BLIT_CODE) + #undef BLIT_CODE emms jmp blit_done _align_ blit_even_mmxlongs: - BLIT_LOOP(even_longsmmx, 2, - even_lmmx_loop: - movq %ds:(%esi), %mm0 ; - addl $8, %esi ; - movq %mm0, %es:(%edi) ; - addl $8, %edi ; - decl %ecx ; + #define BLIT_CODE \ + even_lmmx_loop: \ + movq %ds:(%esi), %mm0 ; \ + addl $8, %esi ; \ + movq %mm0, %es:(%edi) ; \ + addl $8, %edi ; \ + decl %ecx ; \ jnz even_lmmx_loop - ) + BLIT_LOOP(even_longsmmx, 2, BLIT_CODE) + #undef BLIT_CODE emms jmp blit_done _align_ blit_long_word: - BLIT_LOOP(long_word, 2, - movsl ; + #define BLIT_CODE \ + movsl ; \ movsw - ) + BLIT_LOOP(long_word, 2, BLIT_CODE) + #undef BLIT_CODE emms jmp blit_done _align_ blit_only_one_long: - BLIT_LOOP(only_one_wordmmx, 2, - movsl - ) + BLIT_LOOP(only_one_wordmmx, 2, movsl) emms jmp blit_done @@ -418,24 +421,24 @@ jnc blit_even_words _align_ - BLIT_LOOP(longs_and_word, 2, /* long at a time, plus leftover word */ - rep ; movsl ; + #define BLIT_CODE \ + rep ; movsl ; \ movsw - ) + BLIT_LOOP(longs_and_word, 2, BLIT_CODE) /* long at a time, plus leftover word */ + #undef BLIT_CODE jmp blit_done _align_ -blit_even_words: - BLIT_LOOP(even_words, 2, /* copy a long at a time */ +blit_even_words: + #define BLIT_CODE \ rep ; movsl - ) + BLIT_LOOP(even_words, 2, BLIT_CODE) /* copy a long at a time */ + #undef BLIT_CODE jmp blit_done _align_ blit_only_one_word: - BLIT_LOOP(only_one_word, 2, /* copy just the one word */ - movsw - ) + BLIT_LOOP(only_one_word, 2, movsw) /* copy just the one word */ _align_ blit_done: @@ -579,91 +582,93 @@ por %mm1, %mm0 pcmpeqd %mm4, %mm4 /* Create inverter mask */ - - BLIT_LOOP(masked16_mmx_loop, 2, - movd %ecx, %mm2; /* Save line length (%mm2) */ - shrl $3, %ecx; - movl V_MASK, %edx; /* Save 32 bit mask */ - - pushl %es; /* Swap ES and DS */ - pushl %ds; - popl %es; - popl %ds; - - _align_; - masked16_mmx_x_loop: - movq %es:(%esi), %mm1; /* Read 4 pixels */ - movq %mm0, %mm3; - movq %es:8(%esi), %mm5; /* Read 4 more pixels */ - movq %mm0, %mm6; - - pcmpeqw %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ - pcmpeqw %mm5, %mm6; - pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ - pxor %mm4, %mm6; - addl $16, %esi; /* Update src */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - addl $8, %edi; - maskmovq %mm6, %mm5; - - addl $8, %edi; /* Update dest */ - - decl %ecx; /* Any pixel packs left for this line? */ - jnz masked16_mmx_x_loop; - - - movd %mm2, %ecx; /* Restore pixel count */ - andl $7, %ecx; - jz masked16_mmx_loop_end; /* Nothing else to do? */ - shrl $1, %ecx; /* 1 pixels left */ - jnc masked16_mmx_long; - - movw %es:(%esi), %ax; /* Read 1 pixel */ - addl $2, %esi; - addl $2, %edi; - cmpw %ax, %dx; /* Compare with mask */ - je masked16_mmx_long; - movw %ax, -2(%edi); /* Write the pixel */ - - masked16_mmx_long: - - shrl $1, %ecx; /* 2 pixels left */ - jnc masked16_mmx_qword; - - movl %es:(%esi), %eax; /* Read 2 pixels */ - addl $4, %esi; - addl $4, %edi; - cmpw %ax, %dx; /* Compare with mask */ - je masked16_mmx_long_2; - movw %ax, -4(%edi); /* Write pixel */ - - masked16_mmx_long_2: - shrl $16, %eax; - shrl $16, %edx; - cmpl %eax, %edx; - je masked16_mmx_qword; - movw %ax, -2(%edi); - - _align_; - masked16_mmx_qword: - shrl $1, %ecx; /* 4 pixels left */ - jnc masked16_mmx_loop_end; - - movq %es:(%esi), %mm1; /* Read 4 more pixels */ - movq %mm0, %mm3; - - pcmpeqw %mm1, %mm3; /* Compare with mask (%mm3, %mm6) */ - pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - - _align_; - masked16_mmx_loop_end: + /* ??? maskmovq is an SSE instruction! */ - pushl %ds; /* Swap back ES and DS */ + #define BLIT_CODE \ + movd %ecx, %mm2; /* Save line length (%mm2) */ \ + shrl $3, %ecx; \ + movl V_MASK, %edx; /* Save 32 bit mask */ \ + \ + pushl %es; /* Swap ES and DS */ \ + pushl %ds; \ + popl %es; \ + popl %ds; \ + \ + _align_; \ + masked16_mmx_x_loop: \ + \ + movq %es:(%esi), %mm1; /* Read 4 pixels */ \ + movq %mm0, %mm3; \ + movq %es:8(%esi), %mm5; /* Read 4 more pixels */ \ + movq %mm0, %mm6; \ + \ + pcmpeqw %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ \ + pcmpeqw %mm5, %mm6; \ + pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ \ + pxor %mm4, %mm6; \ + addl $16, %esi; /* Update src */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + addl $8, %edi; \ + maskmovq %mm6, %mm5; \ + \ + addl $8, %edi; /* Update dest */ \ + \ + decl %ecx; /* Any pixel packs left for this line? */ \ + jnz masked16_mmx_x_loop; \ + \ + movd %mm2, %ecx; /* Restore pixel count */ \ + andl $7, %ecx; \ + jz masked16_mmx_loop_end; /* Nothing else to do? */ \ + shrl $1, %ecx; /* 1 pixels left */ \ + jnc masked16_mmx_long; \ + \ + movw %es:(%esi), %ax; /* Read 1 pixel */ \ + addl $2, %esi; \ + addl $2, %edi; \ + cmpw %ax, %dx; /* Compare with mask */ \ + je masked16_mmx_long; \ + movw %ax, -2(%edi); /* Write the pixel */ \ + \ + masked16_mmx_long: \ + \ + shrl $1, %ecx; /* 2 pixels left */ \ + jnc masked16_mmx_qword; \ + \ + movl %es:(%esi), %eax; /* Read 2 pixels */ \ + addl $4, %esi; \ + addl $4, %edi; \ + cmpw %ax, %dx; /* Compare with mask */ \ + je masked16_mmx_long_2; \ + movw %ax, -4(%edi); /* Write pixel */ \ + \ + masked16_mmx_long_2: \ + shrl $16, %eax; \ + shrl $16, %edx; \ + cmpl %eax, %edx; \ + je masked16_mmx_qword; \ + movw %ax, -2(%edi); \ + \ + _align_; \ + masked16_mmx_qword: \ + shrl $1, %ecx; /* 4 pixels left */ \ + jnc masked16_mmx_loop_end; \ + \ + movq %es:(%esi), %mm1; /* Read 4 more pixels */ \ + movq %mm0, %mm3; \ + \ + pcmpeqw %mm1, %mm3; /* Compare with mask (%mm3, %mm6) */ \ + pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + \ + _align_; \ + masked16_mmx_loop_end: \ + \ + pushl %ds; /* Swap back ES and DS */ \ popl %es; - ) - + BLIT_LOOP(masked16_mmx_loop, 2, BLIT_CODE) + #undef BLIT_CODE + emms jmp masked16_end; @@ -673,42 +678,42 @@ _align_ masked16_no_mmx: - BLIT_LOOP(masked32, 2, - - movl V_MASK, %edx ; - - test $1, %ecx ; /* 32 bit aligned->use new code */ - jz masked32_blit_x_loop ; - movw (%esi), %ax ; /* read a pixel */ - cmpw %ax, %dx ; /* test it */ - je masked16_blit_skip ; - movw %ax, %es:(%edi) ; /* write the pixel */ - masked16_blit_skip: - decl %ecx ; - jng masked32_blit_end ; - addl $2, %esi ; - addl $2, %edi ; - - _align_ ; - masked32_blit_x_loop: - movl (%esi), %eax ; /* read two pixels */ - addl $4, %esi ; - cmpl %eax, %edx ; /* test it */ - je masked32_blit_skip2 ; - cmpw %ax, %dx ; /* test it */ - je masked32_blit_skip1 ; - movw %ax, %es:(%edi) ; /* write the pixel */ - masked32_blit_skip1: - shrl $16, %eax ; - cmpw %ax, %dx ; /* test it */ - je masked32_blit_skip2 ; - movw %ax, %es:2(%edi) ; /* write the pixel */ - masked32_blit_skip2: - addl $4, %edi ; - subl $2, %ecx ; - jg masked32_blit_x_loop ; + #define BLIT_CODE \ + movl V_MASK, %edx ; \ + \ + test $1, %ecx ; /* 32 bit aligned->use new code */ \ + jz masked32_blit_x_loop ; \ + movw (%esi), %ax ; /* read a pixel */ \ + cmpw %ax, %dx ; /* test it */ \ + je masked16_blit_skip ; \ + movw %ax, %es:(%edi) ; /* write the pixel */ \ + masked16_blit_skip: \ + decl %ecx ; \ + jng masked32_blit_end ; \ + addl $2, %esi ; \ + addl $2, %edi ; \ + \ + _align_ ; \ + masked32_blit_x_loop: \ + movl (%esi), %eax ; /* read two pixels */ \ + addl $4, %esi ; \ + cmpl %eax, %edx ; /* test it */ \ + je masked32_blit_skip2 ; \ + cmpw %ax, %dx ; /* test it */ \ + je masked32_blit_skip1 ; \ + movw %ax, %es:(%edi) ; /* write the pixel */ \ + masked32_blit_skip1: \ + shrl $16, %eax ; \ + cmpw %ax, %dx ; /* test it */ \ + je masked32_blit_skip2 ; \ + movw %ax, %es:2(%edi) ; /* write the pixel */ \ + masked32_blit_skip2: \ + addl $4, %edi ; \ + subl $2, %ecx ; \ + jg masked32_blit_x_loop ; \ masked32_blit_end: - ) + BLIT_LOOP(masked32, 2, BLIT_CODE) + #undef BLIT_CODE masked16_end: Index: iblit32.s =================================================================== RCS file: /cvsroot/alleg/allegro/src/i386/iblit32.s,v retrieving revision 1.10 retrieving revision 1.11 diff -u -d -r1.10 -r1.11 --- iblit32.s 12 Jul 2002 22:51:15 -0000 1.10 +++ iblit32.s 16 Jul 2003 19:43:28 -0000 1.11 @@ -226,72 +226,73 @@ pcmpeqd %mm4, %mm4 /* Create inverter mask */ - BLIT_LOOP(masked32_mmx_loop, 4, - movd %ecx, %mm2; /* Save line length (%mm2) */ - shrl $2, %ecx; - - pushl %es; /* Swap ES and DS */ - pushl %ds; - popl %es; - popl %ds; - - _align_; - masked32_mmx_x_loop: - - movq %es:(%esi), %mm1; /* Read 4 pixels */ - movq %mm0, %mm3; - movq %es:8(%esi), %mm5; /* Read 4 more pixels */ - movq %mm0, %mm6; - - pcmpeqd %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ - pcmpeqd %mm5, %mm6; - pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ - pxor %mm4, %mm6; - addl $16, %esi; /* Update src */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - addl $8, %edi; - maskmovq %mm6, %mm5; - - addl $8, %edi; /* Update dest */ - - decl %ecx; /* Any pixel packs left for this line? */ - jnz masked32_mmx_x_loop; - - - movd %mm2, %ecx; /* Restore pixel count */ - movd %mm0, %edx; - andl $3, %ecx; - jz masked32_mmx_loop_end; /* Nothing else to do? */ - shrl $1, %ecx; /* 1 pixels left */ - jnc masked32_mmx_qword; - - movl %es:(%esi), %eax; /* Read 1 pixel */ - addl $4, %esi; - addl $4, %edi; - cmpl %eax, %edx; /* Compare with mask */ - je masked32_mmx_qword; - movl %eax, -4(%edi); /* Write the pixel */ - - _align_; - masked32_mmx_qword: - - shrl $1, %ecx; /* 2 pixels left */ - jnc masked32_mmx_loop_end; - - movq %es:(%esi), %mm1; /* Read 2 more pixels */ - movq %mm0, %mm3; - - pcmpeqd %mm1, %mm3; /* Compare with mask (%mm3, %mm6) */ - pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - - _align_; - masked32_mmx_loop_end: + /* ??? maskmovq is an SSE instruction! */ - pushl %ds; /* Swap back ES and DS */ + #define BLIT_CODE \ + movd %ecx, %mm2; /* Save line length (%mm2) */ \ + shrl $2, %ecx; \ + \ + pushl %es; /* Swap ES and DS */ \ + pushl %ds; \ + popl %es; \ + popl %ds; \ + \ + _align_; \ + masked32_mmx_x_loop: \ + \ + movq %es:(%esi), %mm1; /* Read 4 pixels */ \ + movq %mm0, %mm3; \ + movq %es:8(%esi), %mm5; /* Read 4 more pixels */ \ + movq %mm0, %mm6; \ + \ + pcmpeqd %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ \ + pcmpeqd %mm5, %mm6; \ + pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ \ + pxor %mm4, %mm6; \ + addl $16, %esi; /* Update src */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + addl $8, %edi; \ + maskmovq %mm6, %mm5; \ + \ + addl $8, %edi; /* Update dest */ \ + \ + decl %ecx; /* Any pixel packs left for this line? */ \ + jnz masked32_mmx_x_loop; \ + \ + movd %mm2, %ecx; /* Restore pixel count */ \ + movd %mm0, %edx; \ + andl $3, %ecx; \ + jz masked32_mmx_loop_end; /* Nothing else to do? */ \ + shrl $1, %ecx; /* 1 pixels left */ \ + jnc masked32_mmx_qword; \ + \ + movl %es:(%esi), %eax; /* Read 1 pixel */ \ + addl $4, %esi; \ + addl $4, %edi; \ + cmpl %eax, %edx; /* Compare with mask */ \ + je masked32_mmx_qword; \ + movl %eax, -4(%edi); /* Write the pixel */ \ + \ + _align_; \ + masked32_mmx_qword: \ + \ + shrl $1, %ecx; /* 2 pixels left */ \ + jnc masked32_mmx_loop_end; \ + \ + movq %es:(%esi), %mm1; /* Read 2 more pixels */ \ + movq %mm0, %mm3; \ + \ + pcmpeqd %mm1, %mm3; /* Compare with mask (%mm3, %mm6) */ \ + pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + \ + _align_; \ + masked32_mmx_loop_end: \ + \ + pushl %ds; /* Swap back ES and DS */ \ popl %es; - ) - + BLIT_LOOP(masked32_mmx_loop, 4, BLIT_CODE) + #undef BLIT_CODE emms jmp masked32_end; @@ -303,30 +304,30 @@ _align_ - BLIT_LOOP(masked, 4, - - _align_ ; - masked_blit_x_loop: - movl (%esi), %eax ; /* read a byte */ - addl $4, %esi ; - - cmpl $MASK_COLOR_32, %eax ;/* test it */ - je masked_blit_skip ; - - movl %eax, %es:(%edi) ; /* write the pixel */ - addl $4, %edi ; - decl %ecx ; - jg masked_blit_x_loop ; - jmp masked_blit_x_loop_done ; - - _align_ ; - masked_blit_skip: - addl $4, %edi ; /* skip zero pixels */ - decl %ecx ; - jg masked_blit_x_loop ; - + #define BLIT_CODE \ + _align_ ; \ + masked_blit_x_loop: \ + movl (%esi), %eax ; /* read a byte */ \ + addl $4, %esi ; \ + \ + cmpl $MASK_COLOR_32, %eax ;/* test it */ \ + je masked_blit_skip ; \ + \ + movl %eax, %es:(%edi) ; /* write the pixel */ \ + addl $4, %edi ; \ + decl %ecx ; \ + jg masked_blit_x_loop ; \ + jmp masked_blit_x_loop_done ; \ + \ + _align_ ; \ + masked_blit_skip: \ + addl $4, %edi ; /* skip zero pixels */ \ + decl %ecx ; \ + jg masked_blit_x_loop ; \ + \ masked_blit_x_loop_done: - ) + BLIT_LOOP(masked, 4, BLIT_CODE) + #undef BLIT_CODE masked32_end: Index: iblit8.s =================================================================== RCS file: /cvsroot/alleg/allegro/src/i386/iblit8.s,v retrieving revision 1.12 retrieving revision 1.13 diff -u -d -r1.12 -r1.13 --- iblit8.s 12 Jul 2002 22:51:15 -0000 1.12 +++ iblit8.s 16 Jul 2003 19:43:28 -0000 1.13 @@ -537,105 +537,107 @@ pcmpeqd %mm4, %mm4 /* Create inverter mask */ - BLIT_LOOP(masked8_mmx_loop, 1, - movd %ecx, %mm2; /* Save line length (%mm2) */ - shrl $4, %ecx; - - pushl %es; /* Swap ES and DS */ - pushl %ds; - popl %es; - popl %ds; - - _align_; - masked8_mmx_x_loop: - - movq %es:(%esi), %mm1; /* Read 8 pixels */ - movq %mm0, %mm3; - movq %es:8(%esi), %mm5; /* Read 8 more pixels */ - movq %mm0, %mm6; - - pcmpeqb %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ - pcmpeqb %mm5, %mm6; - pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ - pxor %mm4, %mm6; - addl $16, %esi; /* Update src */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - addl $8, %edi; - maskmovq %mm6, %mm5; - - addl $8, %edi; /* Update dest */ - - decl %ecx; /* Any pixel packs left for this line? */ - jnz masked8_mmx_x_loop; - - - movd %mm2, %ecx; /* Restore pixel count */ - andl $15, %ecx; - jz masked8_mmx_loop_end; /* Nothing else to do? */ - shrl $1, %ecx; /* 1 pixels left */ - jnc masked8_mmx_word; - - movb %es:(%esi), %al; /* Read 1 pixel */ - incl %esi; - incl %edi; - orb %al, %al; /* Compare with mask */ - jz masked8_mmx_word; - movb %al, -1(%edi); /* Write the pixel */ - - masked8_mmx_word: - shrl $1, %ecx; /* 2 pixels left */ - jnc masked8_mmx_long; - - movb %es:(%esi), %al; /* Read 2 pixels */ - movb %es:1(%esi), %ah; - addl $2, %esi; - addl $2, %edi; - orb %al, %al; - jz masked8_mmx_word_2; - movb %al, -2(%edi); /* Write pixel */ - - masked8_mmx_word_2: - orb %ah, %ah; - jz masked8_mmx_long; - movb %ah, -1(%edi); /* Write other pixel */ - - _align_; - masked8_mmx_long: - - shrl $1, %ecx; /* 4 pixels left */ - jnc masked8_mmx_qword; - - movl %es:(%esi), %eax; /* Read 4 pixels */ - addl $4, %esi; - movd %eax, %mm1; - movl $-1, %eax; - movq %mm0, %mm3; - movd %eax, %mm5; /* Build XOR flag */ - - pcmpeqb %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ - pxor %mm5, %mm3; /* Turn 1->0 and 0->1 */ - pand %mm5, %mm3; /* Make sure only the bottom 32 bits are used */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - addl $4, %edi; - - _align_; - masked8_mmx_qword: - shrl $1, %ecx; /* 8 pixels left */ - jnc masked8_mmx_loop_end; - - movq %es:(%esi), %mm1; /* Read 8 more pixels */ - movq %mm0, %mm3; - - pcmpeqw %mm1, %mm3; /* Compare with mask (%mm3) */ - pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ - maskmovq %mm3, %mm1; /* Write if not equal to mask. Note: maskmovq is an SSE instruction! */ - - _align_; - masked8_mmx_loop_end: + /* Note: maskmovq is an SSE instruction! */ - pushl %ds; /* Swap back ES and DS */ + #define BLIT_CODE \ + movd %ecx, %mm2; /* Save line length (%mm2) */ \ + shrl $4, %ecx; \ + \ + pushl %es; /* Swap ES and DS */ \ + pushl %ds; \ + popl %es; \ + popl %ds; \ + \ + _align_; \ + masked8_mmx_x_loop: \ + \ + movq %es:(%esi), %mm1; /* Read 8 pixels */ \ + movq %mm0, %mm3; \ + movq %es:8(%esi), %mm5; /* Read 8 more pixels */ \ + movq %mm0, %mm6; \ + \ + pcmpeqb %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ \ + pcmpeqb %mm5, %mm6; \ + pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ \ + pxor %mm4, %mm6; \ + addl $16, %esi; /* Update src */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + addl $8, %edi; \ + maskmovq %mm6, %mm5; \ + \ + addl $8, %edi; /* Update dest */ \ + \ + decl %ecx; /* Any pixel packs left for this line? */ \ + jnz masked8_mmx_x_loop; \ + \ + movd %mm2, %ecx; /* Restore pixel count */ \ + andl $15, %ecx; \ + jz masked8_mmx_loop_end; /* Nothing else to do? */ \ + shrl $1, %ecx; /* 1 pixels left */ \ + jnc masked8_mmx_word; \ + \ + movb %es:(%esi), %al; /* Read 1 pixel */ \ + incl %esi; \ + incl %edi; \ + orb %al, %al; /* Compare with mask */ \ + jz masked8_mmx_word; \ + movb %al, -1(%edi); /* Write the pixel */ \ + \ + masked8_mmx_word: \ + shrl $1, %ecx; /* 2 pixels left */ \ + jnc masked8_mmx_long; \ + \ + movb %es:(%esi), %al; /* Read 2 pixels */ \ + movb %es:1(%esi), %ah; \ + addl $2, %esi; \ + addl $2, %edi; \ + orb %al, %al; \ + jz masked8_mmx_word_2; \ + movb %al, -2(%edi); /* Write pixel */ \ + \ + masked8_mmx_word_2: \ + orb %ah, %ah; \ + jz masked8_mmx_long; \ + movb %ah, -1(%edi); /* Write other pixel */ \ + \ + _align_; \ + masked8_mmx_long: \ + \ + shrl $1, %ecx; /* 4 pixels left */ \ + jnc masked8_mmx_qword; \ + \ + movl %es:(%esi), %eax; /* Read 4 pixels */ \ + addl $4, %esi; \ + movd %eax, %mm1; \ + movl $-1, %eax; \ + movq %mm0, %mm3; \ + movd %eax, %mm5; /* Build XOR flag */ \ + \ + pcmpeqb %mm1, %mm3; /* Compare with mask (%mm3/%mm6) */ \ + pxor %mm5, %mm3; /* Turn 1->0 and 0->1 */ \ + pand %mm5, %mm3; /* Make sure only the bottom 32 bits are used */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + addl $4, %edi; \ + \ + _align_; \ + masked8_mmx_qword: \ + shrl $1, %ecx; /* 8 pixels left */ \ + jnc masked8_mmx_loop_end; \ + \ + movq %es:(%esi), %mm1; /* Read 8 more pixels */ \ + movq %mm0, %mm3; \ + \ + pcmpeqw %mm1, %mm3; /* Compare with mask (%mm3) */ \ + pxor %mm4, %mm3; /* Turn 1->0 and 0->1 */ \ + maskmovq %mm3, %mm1; /* Write if not equal to mask. */ \ + \ + _align_; \ + masked8_mmx_loop_end: \ + \ + pushl %ds; /* Swap back ES and DS */ \ popl %es; - ) + BLIT_LOOP(masked8_mmx_loop, 1, BLIT_CODE) + #undef BLIT_CODE emms @@ -647,114 +649,114 @@ masked8_no_mmx: - BLIT_LOOP(masked16, 1, - - test $1, %ecx ; /* 16 bit aligned->use new code */ - jz masked16_blit ; /* width 16 bit aligned */ - movb (%esi), %al ; /* read a byte */ - incl %esi ; - orb %al, %al ; /* test it */ - jz masked8_skip ; - movb %al, %es:(%edi) ; /* write the pixel */ - masked8_skip: - incl %edi ; - decl %ecx ; - jng masked16_blit_end ; - - masked16_blit: - test $3, %ecx ; /* 32 bit aligned->use new code */ - jz masked16_blit_x_loop ; /* width 32 bit aligned */ - movw (%esi), %ax ; /* read two pixels */ - orw %ax, %ax ; - jz masked16_blit_end2 ; - orb %al,%al ; - jz masked16_blit_wskip1 ; - orb %ah,%ah ; - jz masked16_blit_p1wskip2 ; - movw %ax, %es:(%edi) ; /* write the pixel */ - jmp masked16_blit_end2 ; - _align_ ; - masked16_blit_p1wskip2: - movb %al, %es:(%edi) ; /* write the pixel */ - jmp masked16_blit_end2 ; - _align_ ; - masked16_blit_wskip1: - movb %ah, %es:1(%edi) ; /* write the pixel */ - _align_ ; - masked16_blit_end2: - subl $2, %ecx ; - jng masked16_blit_end ; - addl $2, %esi ; - addl $2, %edi ; - - _align_ ; - masked16_blit_x_loop: - movl (%esi), %eax ; /* read four pixels */ - addl $4, %esi ; - movl %eax, %edx ; - shrl $16,%edx ; - orl %eax, %eax ; - jz masked16_blit_skip4 ; - orw %ax, %ax ; - jz masked16_blit_skip2 ; - orb %al,%al ; - jz masked16_blit_skip1 ; - orb %ah, %ah ; - jz masked16_put1skip2 ; - orb %dl,%dl ; - jz masked16_put12_skip3 ; - orb %dh,%dh ; - jz masked16_put123_skip4 ; - movl %eax, %es:(%edi) ; /* write the pixel */ - jmp masked16_blit_skip4 ; - - _align_ ; - masked16_put1skip2: - movb %al, %es:(%edi) ; /* write the pixel */ - jmp masked16_blit_skip2 ; - _align_ ; - masked16_put12_skip3: - movw %ax, %es:(%edi) ; /* write the pixel */ - orb %dh, %dh ; - jnz masked16_blit_skip3 ; - jmp masked16_blit_skip4 ; - _align_ ; - masked16_put123_skip4: - movw %ax, %es:(%edi) ; /* write the pixel */ - movb %dl, %es:2(%edi) ; /* write the pixel */ - addl $4, %edi ; - subl $4, %ecx ; - jg masked16_blit_x_loop ; - jmp masked16_blit_end ; - - masked16_blit_skip1: - movb %ah, %es:1(%edi) ; /* write the pixel */ - masked16_blit_skip2: - orw %dx, %dx ; - jz masked16_blit_skip4 ; - orb %dl,%dl ; - jz masked16_blit_skip3 ; - orb %dh, %dh ; - jz masked16_put3skip4 ; - movw %dx, %es:2(%edi) ; /* write the pixel */ - jmp masked16_blit_skip4 ; - - _align_ ; - masked16_put3skip4: - movb %dl, %es:2(%edi) ; /* write the pixel */ - addl $4, %edi ; - subl $4, %ecx ; - jg masked16_blit_x_loop ; - jmp masked16_blit_end ; - - masked16_blit_skip3: - movb %dh, %es:3(%edi) ; /* write the pixel */ - masked16_blit_skip4: - addl $4, %edi ; - subl $4, %ecx ; - jg masked16_blit_x_loop ; + #define BLIT_CODE \ + test $1, %ecx ; /* 16 bit aligned->use new code */ \ + jz masked16_blit ; /* width 16 bit aligned */ \ + movb (%esi), %al ; /* read a byte */ \ + incl %esi ; \ + orb %al, %al ; /* test it */ \ + jz masked8_skip ; \ + movb %al, %es:(%edi) ; /* write the pixel */ \ + masked8_skip: \ + incl %edi ; \ + decl %ecx ; \ + jng masked16_blit_end ; \ + \ + masked16_blit: \ + test $3, %ecx ; /* 32 bit aligned->use new code */ \ + jz masked16_blit_x_loop ; /* width 32 bit aligned */ \ + movw (%esi), %ax ; /* read two pixels */ \ + orw %ax, %ax ; \ + jz masked16_blit_end2 ; \ + orb %al,%al ; \ + jz masked16_blit_wskip1 ; \ + orb %ah,%ah ; \ + jz masked16_blit_p1wskip2 ; \ + movw %ax, %es:(%edi) ; /* write the pixel */ \ + jmp masked16_blit_end2 ; \ + _align_ ; \ + masked16_blit_p1wskip2: \ + movb %al, %es:(%edi) ; /* write the pixel */ \ + jmp masked16_blit_end2 ; \ + _align_ ; \ + masked16_blit_wskip1: \ + movb %ah, %es:1(%edi) ; /* write the pixel */ \ + _align_ ; \ + masked16_blit_end2: \ + subl $2, %ecx ; \ + jng masked16_blit_end ; \ + addl $2, %esi ; \ + addl $2, %edi ; \ + \ + _align_ ; \ + masked16_blit_x_loop: \ + movl (%esi), %eax ; /* read four pixels */ \ + addl $4, %esi ; \ + movl %eax, %edx ; \ + shrl $16,%edx ; \ + orl %eax, %eax ; \ + jz masked16_blit_skip4 ; \ + orw %ax, %ax ; \ + jz masked16_blit_skip2 ; \ + orb %al,%al ; \ + jz masked16_blit_skip1 ; \ + orb %ah, %ah ; \ + jz masked16_put1skip2 ; \ + orb %dl,%dl ; \ + jz masked16_put12_skip3 ; \ + orb %dh,%dh ; \ + jz masked16_put123_skip4 ; \ + movl %eax, %es:(%edi) ; /* write the pixel */ \ + jmp masked16_blit_skip4 ; \ + \ + _align_ ; \ + masked16_put1skip2: \ + movb %al, %es:(%edi) ; /* write the pixel */ \ + jmp masked16_blit_skip2 ; \ + _align_ ; \ + masked16_put12_skip3: \ + movw %ax, %es:(%edi) ; /* write the pixel */ \ + orb %dh, %dh ; \ + jnz masked16_blit_skip3 ; \ + jmp masked16_blit_skip4 ; \ + _align_ ; \ + masked16_put123_skip4: \ + movw %ax, %es:(%edi) ; /* write the pixel */ \ + movb %dl, %es:2(%edi) ; /* write the pixel */ \ + addl $4, %edi ; \ + subl $4, %ecx ; \ + jg masked16_blit_x_loop ; \ + jmp masked16_blit_end ; \ + \ + masked16_blit_skip1: \ + movb %ah, %es:1(%edi) ; /* write the pixel */ \ + masked16_blit_skip2: \ + orw %dx, %dx ; \ + jz masked16_blit_skip4 ; \ + orb %dl,%dl ; \ + jz masked16_blit_skip3 ; \ + orb %dh, %dh ; \ + jz masked16_put3skip4 ; \ + movw %dx, %es:2(%edi) ; /* write the pixel */ \ + jmp masked16_blit_skip4 ; \ + \ + _align_ ; \ + masked16_put3skip4: \ + movb %dl, %es:2(%edi) ; /* write the pixel */ \ + addl $4, %edi ; \ + subl $4, %ecx ; \ + jg masked16_blit_x_loop ; \ + jmp masked16_blit_end ; \ + \ + masked16_blit_skip3: \ + movb %dh, %es:3(%edi) ; /* write the pixel */ \ + masked16_blit_skip4: \ + addl $4, %edi ; \ + subl $4, %ecx ; \ + jg masked16_blit_x_loop ; \ masked16_blit_end: - ) + BLIT_LOOP(masked16, 1, BLIT_CODE) + #undef BLIT_CODE masked8_end: Index: iscan.s =================================================================== RCS file: /cvsroot/alleg/allegro/src/i386/iscan.s,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- iscan.s 15 Jul 2003 07:54:58 -0000 1.7 +++ iscan.s 16 Jul 2003 19:43:28 -0000 1.8 @@ -158,7 +158,7 @@ #define TMP -32(%ebp) /* helper for setting up an affine texture mapping operation */ -#define INIT_ATEX(extra...) \ +#define INIT_ATEX(extra) \ pushl %ebp ;\ movl %esp, %ebp ;\ subl $32, %esp ;\ @@ -210,7 +210,7 @@ * Fills an affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex8) - INIT_ATEX() + INIT_ATEX(/**/) addl DVL, %ebx adcl DVH, %edx @@ -251,7 +251,7 @@ * Fills a masked affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask8) - INIT_ATEX() + INIT_ATEX(/**/) addl DVL, %ebx adcl DVH, %edx @@ -292,18 +292,19 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_lit8) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %ebx ; - sarl $8, %eax ; - sarl $8, %ebx ; - jge atex_lit_round ; - incl %ebx ; - atex_lit_round: - subl %ebx, %eax ; - movl %ebx, DC ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %ebx ; \ + sarl $8, %eax ; \ + sarl $8, %ebx ; \ + jge atex_lit_round ; \ + incl %ebx ; \ + atex_lit_round: \ + subl %ebx, %eax ; \ + movl %ebx, DC ; \ movl %eax, C - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE movl %eax, TMP addl DVL, %ebx @@ -364,18 +365,19 @@ * Fills a masked lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask_lit8) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %ebx ; - sarl $8, %eax ; - sarl $8, %ebx ; - jge atex_mask_lit_round ; - incl %ebx ; - atex_mask_lit_round: - subl %ebx, %eax ; - movl %ebx, DC ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %ebx ; \ + sarl $8, %eax ; \ + sarl $8, %ebx ; \ + jge atex_mask_lit_round ; \ + incl %ebx ; \ + atex_mask_lit_round: \ + subl %ebx, %eax ; \ + movl %ebx, DC ; \ movl %eax, C - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE movl %eax, TMP addl DVL, %ebx @@ -1124,7 +1126,7 @@ #define READ_ADDR -48(%ebp) /* first part of an affine texture mapping operation */ -#define INIT_ATEX(extra...) \ +#define INIT_ATEX(extra) \ pushl %ebp ; \ movl %esp, %ebp ; \ subl $48, %esp /* local variables */ ; \ @@ -1186,7 +1188,7 @@ * Fills an affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex16) - INIT_ATEX() + INIT_ATEX(/**/) movw (%esi, %eax, 2), %ax /* read texel */ movw %ax, FSEG(%edi) /* write the pixel */ addl $2, %edi @@ -1197,7 +1199,7 @@ * Fills a masked affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask15) - INIT_ATEX() + INIT_ATEX(/**/) movw (%esi, %eax, 2), %ax /* read texel */ cmpw $MASK_COLOR_15, %ax jz 7f @@ -1210,7 +1212,7 @@ * Fills a masked affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask16) - INIT_ATEX() + INIT_ATEX(/**/) movw (%esi, %eax, 2), %ax /* read texel */ cmpw $MASK_COLOR_16, %ax jz 7f @@ -1227,7 +1229,7 @@ * Fills an affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex32) - INIT_ATEX() + INIT_ATEX(/**/) movl (%esi, %eax, 4), %eax /* read texel */ movl %eax, FSEG(%edi) /* write the pixel */ addl $4, %edi @@ -1238,7 +1240,7 @@ * Fills a masked affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask32) - INIT_ATEX() + INIT_ATEX(/**/) movl (%esi, %eax, 4), %eax /* read texel */ cmpl $MASK_COLOR_32, %eax jz 7f @@ -1253,7 +1255,7 @@ * Fills an affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex24) - INIT_ATEX() + INIT_ATEX(/**/) leal (%eax, %eax, 2), %ecx movw (%esi, %ecx), %ax /* read texel */ movw %ax, FSEG(%edi) /* write the pixel */ @@ -1267,7 +1269,7 @@ * Fills a masked affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask24) - INIT_ATEX() + INIT_ATEX(/**/) leal (%eax, %eax, 2), %ecx movzbl 2(%esi, %ecx), %eax /* read texel */ shll $16, %eax @@ -1289,12 +1291,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_lit15) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE pushl %edx movzbl 2+ALPHA, %edx pushl %edx @@ -1316,12 +1319,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask_lit15) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE movw (%esi, %eax, 2), %ax /* read texel */ cmpw $MASK_COLOR_15, %ax jz 7f @@ -1348,12 +1352,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_lit16) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE pushl %edx movzbl 2+ALPHA, %edx pushl %edx @@ -1375,12 +1380,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask_lit16) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE movw (%esi, %eax, 2), %ax /* read texel */ cmpw $MASK_COLOR_16, %ax jz 7f @@ -1410,12 +1416,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_lit32) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE pushl %edx movzbl 2+ALPHA, %edx pushl %edx @@ -1437,12 +1444,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_mask_lit32) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE movl (%esi, %eax, 4), %eax /* read texel */ cmpl $MASK_COLOR_32, %eax jz 7f @@ -1472,12 +1480,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC(_poly_scanline_atex_lit24) - INIT_ATEX( - movl POLYSEG_C(%esi), %eax ; - movl POLYSEG_DC(%esi), %edx ; - movl %eax, ALPHA ; + #define INIT_CODE \ + movl POLYSEG_C(%esi), %eax ; \ + movl POLYSEG_DC(%esi), %edx ; \ + movl %eax, ALPHA ; \ movl %edx, DALPHA - ) + INIT_ATEX(INIT_CODE) + #undef INIT_CODE pushl %edx movzbl 2+ALPHA, %edx pushl %edx @@ -1504,12 +1513,13 @@ * Fills a lit affine texture mapped polygon scanline. */ FUNC... [truncated message content] |