|
From: <CW...@us...> - 2012-03-25 18:29:43
|
Revision: 1515
http://graphics32.svn.sourceforge.net/graphics32/?rev=1515&view=rev
Author: CWBudde
Date: 2012-03-25 18:29:35 +0000 (Sun, 25 Mar 2012)
Log Message:
-----------
* SSE2 code optimization review (added missing implementations and changed some implementation orders)
Modified Paths:
--------------
trunk/Source/GR32_Blend.pas
trunk/Source/GR32_LowLevel.pas
Modified: trunk/Source/GR32_Blend.pas
===================================================================
--- trunk/Source/GR32_Blend.pas 2012-03-25 10:05:08 UTC (rev 1514)
+++ trunk/Source/GR32_Blend.pas 2012-03-25 18:29:35 UTC (rev 1515)
@@ -775,6 +775,68 @@
procedure BlendMem_ASM(F: TColor32; var B: TColor32);
asm
+{$IFDEF TARGET_x86}
+ // EAX <- F
+ // [EDX] <- B
+
+ // Test Fa = 0 ?
+ TEST EAX,$FF000000 // Fa = 0 ? => do not write
+ JZ @2
+
+ // Get weight W = Fa * M
+ MOV ECX,EAX // ECX <- Fa Fr Fg Fb
+ SHR ECX,24 // ECX <- 00 00 00 Fa
+
+ // Test Fa = 255 ?
+ CMP ECX,$FF
+ JZ @1
+
+ PUSH EBX
+ PUSH ESI
+
+ // P = W * F
+ MOV EBX,EAX // EBX <- Fa Fr Fg Fb
+ AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
+ AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
+ IMUL EAX,ECX // EAX <- Pr ** Pb **
+ SHR EBX,8 // EBX <- 00 Fa 00 Fg
+ IMUL EBX,ECX // EBX <- Pa ** Pg **
+ ADD EAX,bias
+ AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
+ SHR EAX,8 // EAX <- 00 Pr ** Pb
+ ADD EBX,bias
+ AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
+ OR EAX,EBX // EAX <- Pa Pr Pg Pb
+
+ MOV ESI,[EDX]
+
+// W = 1 - W; Q = W * B
+ XOR ECX,$000000FF // ECX <- 1 - ECX
+ MOV EBX,ESI // EBX <- Ba Br Bg Bb
+ AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
+ AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
+ IMUL ESI,ECX // ESI <- Qr ** Qb **
+ SHR EBX,8 // EBX <- 00 Ba 00 Bg
+ IMUL EBX,ECX // EBX <- Qa ** Qg **
+ ADD ESI,bias
+ AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
+ SHR ESI,8 // ESI <- 00 Qr ** Qb
+ ADD EBX,bias
+ AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
+ OR EBX,ESI // EBX <- Qa Qr Qg Qb
+
+ // Z = P + Q (assuming no overflow at each byte)
+ ADD EAX,EBX // EAX <- Za Zr Zg Zb
+
+ MOV [EDX],EAX
+ POP ESI
+ POP EBX
+ RET
+
+@1: MOV [EDX],EAX
+@2: RET
+{$ENDIF}
+
{$IFDEF TARGET_x64}
// ECX <- F
// [RDX] <- B
@@ -832,80 +894,76 @@
@1: MOV [RDX],EAX
@2: RET
{$ENDIF}
+end;
-{$IFDEF TARGET_x86}
+function BlendRegEx_ASM(F, B, M: TColor32): TColor32;
+asm
+ // blend foreground color (F) to a background color (B),
+ // using alpha channel value of F multiplied by master alpha (M)
+ // no checking for M = $FF, in this case Graphics32 uses BlendReg
+ // Result Z = Fa * M * Frgb + (1 - Fa * M) * Brgb
// EAX <- F
- // [EDX] <- B
+ // EDX <- B
+ // ECX <- M
- // Test Fa = 0 ?
- TEST EAX,$FF000000 // Fa = 0 ? => do not write
+{$IFDEF TARGET_x86}
+
+// Check Fa > 0 ?
+ TEST EAX,$FF000000 // Fa = 0? => Result := EDX
JZ @2
+ PUSH EBX
+
// Get weight W = Fa * M
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
+ MOV EBX,EAX // EBX <- Fa Fr Fg Fb
+ INC ECX // 255:256 range bias
+ SHR EBX,24 // EBX <- 00 00 00 Fa
+ IMUL ECX,EBX // ECX <- 00 00 W **
+ SHR ECX,8 // ECX <- 00 00 00 W
+ JZ @1 // W = 0 ? => Result := EDX
- // Test Fa = 255 ?
- CMP ECX,$FF
- JZ @1
-
- PUSH EBX
- PUSH ESI
-
// P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
+ MOV EBX,EAX // EBX <- ** Fr Fg Fb
AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
+ AND EBX,$0000FF00 // EBX <- 00 00 Fg 00
IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
+ SHR EBX,8 // EBX <- 00 00 00 Fg
+ IMUL EBX,ECX // EBX <- 00 00 Pg **
ADD EAX,bias
AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
SHR EAX,8 // EAX <- 00 Pr ** Pb
ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
+ AND EBX,$0000FF00 // EBX <- 00 00 Pg 00
+ OR EAX,EBX // EAX <- 00 Pr Pg Pb
- MOV ESI,[EDX]
-
-// W = 1 - W; Q = W * B
+ // W = 1 - W; Q = W * B
XOR ECX,$000000FF // ECX <- 1 - ECX
- MOV EBX,ESI // EBX <- Ba Br Bg Bb
- AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL ESI,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD ESI,bias
- AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR ESI,8 // ESI <- 00 Qr ** Qb
+ MOV EBX,EDX // EBX <- 00 Br Bg Bb
+ AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
+ AND EBX,$0000FF00 // EBX <- 00 00 Bg 00
+ IMUL EDX,ECX // EDX <- Qr ** Qb **
+ SHR EBX,8 // EBX <- 00 00 00 Bg
+ IMUL EBX,ECX // EBX <- 00 00 Qg **
+ ADD EDX,bias
+ AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
+ SHR EDX,8 // EDX <- 00 Qr ** Qb
ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,ESI // EBX <- Qa Qr Qg Qb
+ AND EBX,$0000FF00 // EBX <- 00 00 Qg 00
+ OR EBX,EDX // EBX <- 00 Qr Qg Qb
// Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
+ ADD EAX,EBX // EAX <- 00 Zr Zg Zb
- MOV [EDX],EAX
- POP ESI
POP EBX
RET
-@1: MOV [EDX],EAX
-@2: RET
+@1:
+ POP EBX
+
+@2: MOV EAX,EDX
+ RET
{$ENDIF}
-end;
-function BlendRegEx_ASM(F, B, M: TColor32): TColor32;
-asm
- // blend foreground color (F) to a background color (B),
- // using alpha channel value of F multiplied by master alpha (M)
- // no checking for M = $FF, in this case Graphics32 uses BlendReg
- // Result Z = Fa * M * Frgb + (1 - Fa * M) * Brgb
- // EAX <- F
- // EDX <- B
- // ECX <- M
-
{$IFDEF TARGET_x64}
MOV EAX,ECX // EAX <- Fa Fr Fg Fb
TEST EAX,$FF000000 // Fa = 0? => Result := EDX
@@ -955,11 +1013,17 @@
@1: MOV EAX,EDX
RET
{$ENDIF}
+end;
+procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: TColor32);
+asm
{$IFDEF TARGET_x86}
+ // EAX <- F
+ // [EDX] <- B
+ // ECX <- M
-// Check Fa > 0 ?
- TEST EAX,$FF000000 // Fa = 0? => Result := EDX
+ // Check Fa > 0 ?
+ TEST EAX,$FF000000 // Fa = 0? => write nothing
JZ @2
PUSH EBX
@@ -970,8 +1034,10 @@
SHR EBX,24 // EBX <- 00 00 00 Fa
IMUL ECX,EBX // ECX <- 00 00 W **
SHR ECX,8 // ECX <- 00 00 00 W
- JZ @1 // W = 0 ? => Result := EDX
+ JZ @1 // W = 0 ? => write nothing
+ PUSH ESI
+
// P = W * F
MOV EBX,EAX // EBX <- ** Fr Fg Fb
AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
@@ -987,36 +1053,31 @@
OR EAX,EBX // EAX <- 00 Pr Pg Pb
// W = 1 - W; Q = W * B
+ MOV ESI,[EDX]
XOR ECX,$000000FF // ECX <- 1 - ECX
- MOV EBX,EDX // EBX <- 00 Br Bg Bb
- AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
+ MOV EBX,ESI // EBX <- 00 Br Bg Bb
+ AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
AND EBX,$0000FF00 // EBX <- 00 00 Bg 00
- IMUL EDX,ECX // EDX <- Qr ** Qb **
+ IMUL ESI,ECX // ESI <- Qr ** Qb **
SHR EBX,8 // EBX <- 00 00 00 Bg
IMUL EBX,ECX // EBX <- 00 00 Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
+ ADD ESI,bias
+ AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
+ SHR ESI,8 // ESI <- 00 Qr ** Qb
ADD EBX,bias
AND EBX,$0000FF00 // EBX <- 00 00 Qg 00
- OR EBX,EDX // EBX <- 00 Qr Qg Qb
+ OR EBX,ESI // EBX <- 00 Qr Qg Qb
// Z = P + Q (assuming no overflow at each byte)
ADD EAX,EBX // EAX <- 00 Zr Zg Zb
- POP EBX
- RET
+ MOV [EDX],EAX
+ POP ESI
-@1:
- POP EBX
-
-@2: MOV EAX,EDX
- RET
+@1: POP EBX
+@2: RET
{$ENDIF}
-end;
-procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: TColor32);
-asm
{$IFDEF TARGET_x64}
// ECX <- F
// [RDX] <- B
@@ -1075,70 +1136,94 @@
@1: RET
{$ENDIF}
+end;
+
+procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer);
+asm
{$IFDEF TARGET_x86}
- // EAX <- F
- // [EDX] <- B
- // ECX <- M
+ // EAX <- Src
+ // EDX <- Dst
+ // ECX <- Count
- // Check Fa > 0 ?
- TEST EAX,$FF000000 // Fa = 0? => write nothing
- JZ @2
+ // test the counter for zero or negativity
+ TEST ECX,ECX
+ JS @4
PUSH EBX
+ PUSH ESI
+ PUSH EDI
+ MOV ESI,EAX // ESI <- Src
+ MOV EDI,EDX // EDI <- Dst
+
+ // loop start
+@1: MOV EAX,[ESI]
+ TEST EAX,$FF000000
+ JZ @3 // complete transparency, proceed to next point
+
+ PUSH ECX // store counter
+
// Get weight W = Fa * M
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- INC ECX // 255:256 range bias
- SHR EBX,24 // EBX <- 00 00 00 Fa
- IMUL ECX,EBX // ECX <- 00 00 W **
- SHR ECX,8 // ECX <- 00 00 00 W
- JZ @1 // W = 0 ? => write nothing
+ MOV ECX,EAX // ECX <- Fa Fr Fg Fb
+ SHR ECX,24 // ECX <- 00 00 00 Fa
- PUSH ESI
+ // Test Fa = 255 ?
+ CMP ECX,$FF
+ JZ @2
// P = W * F
- MOV EBX,EAX // EBX <- ** Fr Fg Fb
+ MOV EBX,EAX // EBX <- Fa Fr Fg Fb
AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$0000FF00 // EBX <- 00 00 Fg 00
+ AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 00 00 Fg
- IMUL EBX,ECX // EBX <- 00 00 Pg **
+ SHR EBX,8 // EBX <- 00 Fa 00 Fg
+ IMUL EBX,ECX // EBX <- Pa ** Pg **
ADD EAX,bias
AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
SHR EAX,8 // EAX <- 00 Pr ** Pb
ADD EBX,bias
- AND EBX,$0000FF00 // EBX <- 00 00 Pg 00
- OR EAX,EBX // EAX <- 00 Pr Pg Pb
+ AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
+ OR EAX,EBX // EAX <- Pa Pr Pg Pb
// W = 1 - W; Q = W * B
- MOV ESI,[EDX]
+ MOV EDX,[EDI]
XOR ECX,$000000FF // ECX <- 1 - ECX
- MOV EBX,ESI // EBX <- 00 Br Bg Bb
- AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$0000FF00 // EBX <- 00 00 Bg 00
- IMUL ESI,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 00 00 Bg
- IMUL EBX,ECX // EBX <- 00 00 Qg **
- ADD ESI,bias
- AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR ESI,8 // ESI <- 00 Qr ** Qb
+ MOV EBX,EDX // EBX <- Ba Br Bg Bb
+ AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
+ AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
+ IMUL EDX,ECX // ESI <- Qr ** Qb **
+ SHR EBX,8 // EBX <- 00 Ba 00 Bg
+ IMUL EBX,ECX // EBX <- Qa ** Qg **
+ ADD EDX,bias
+ AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
+ SHR EDX,8 // ESI <- 00 Qr ** Qb
ADD EBX,bias
- AND EBX,$0000FF00 // EBX <- 00 00 Qg 00
- OR EBX,ESI // EBX <- 00 Qr Qg Qb
+ AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
+ OR EBX,EDX // EBX <- Qa Qr Qg Qb
// Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- 00 Zr Zg Zb
+ ADD EAX,EBX // EAX <- Za Zr Zg Zb
+@2:
+ MOV [EDI],EAX
- MOV [EDX],EAX
+ POP ECX // restore counter
+
+@3:
+ ADD ESI,4
+ ADD EDI,4
+
+ // loop end
+ DEC ECX
+ JNZ @1
+
+ POP EDI
POP ESI
+ POP EBX
-@1: POP EBX
-@2: RET
+@4:
+ RET
{$ENDIF}
-end;
-procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer);
-asm
{$IFDEF TARGET_x64}
// RCX <- Src
// RDX <- Dst
@@ -1218,89 +1303,6 @@
@4:
RET
{$ENDIF}
-{$IFDEF TARGET_x86}
- // EAX <- Src
- // EDX <- Dst
- // ECX <- Count
-
- // test the counter for zero or negativity
- TEST ECX,ECX
- JS @4
-
- PUSH EBX
- PUSH ESI
- PUSH EDI
-
- MOV ESI,EAX // ESI <- Src
- MOV EDI,EDX // EDI <- Dst
-
- // loop start
-@1: MOV EAX,[ESI]
- TEST EAX,$FF000000
- JZ @3 // complete transparency, proceed to next point
-
- PUSH ECX // store counter
-
- // Get weight W = Fa * M
- MOV ECX,EAX // ECX <- Fa Fr Fg Fb
- SHR ECX,24 // ECX <- 00 00 00 Fa
-
- // Test Fa = 255 ?
- CMP ECX,$FF
- JZ @2
-
- // P = W * F
- MOV EBX,EAX // EBX <- Fa Fr Fg Fb
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
- IMUL EAX,ECX // EAX <- Pr ** Pb **
- SHR EBX,8 // EBX <- 00 Fa 00 Fg
- IMUL EBX,ECX // EBX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
- SHR EAX,8 // EAX <- 00 Pr ** Pb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
- OR EAX,EBX // EAX <- Pa Pr Pg Pb
-
- // W = 1 - W; Q = W * B
- MOV EDX,[EDI]
- XOR ECX,$000000FF // ECX <- 1 - ECX
- MOV EBX,EDX // EBX <- Ba Br Bg Bb
- AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
- AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
- IMUL EDX,ECX // ESI <- Qr ** Qb **
- SHR EBX,8 // EBX <- 00 Ba 00 Bg
- IMUL EBX,ECX // EBX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
- SHR EDX,8 // ESI <- 00 Qr ** Qb
- ADD EBX,bias
- AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
- OR EBX,EDX // EBX <- Qa Qr Qg Qb
-
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,EBX // EAX <- Za Zr Zg Zb
-@2:
- MOV [EDI],EAX
-
- POP ECX // restore counter
-
-@3:
- ADD ESI,4
- ADD EDI,4
-
- // loop end
- DEC ECX
- JNZ @1
-
- POP EDI
- POP ESI
- POP EBX
-
-@4:
- RET
-{$ENDIF}
end;
{$IFDEF TARGET_x86}
@@ -1487,58 +1489,12 @@
asm
// combine RGBA channels of colors X and Y with the weight of X given in W
// Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
+{$IFDEF TARGET_x86}
// EAX <- X
// EDX <- Y
// ECX <- W
-{$IFDEF TARGET_x64}
-
// W = 0 or $FF?
- TEST R8,R8
- JZ @1 // W = 0 ? => Result := EDX
- MOV EAX,ECX // EAX <- Xa Xr Xg Xb
- CMP R8B,$FF // W = $FF ? => Result := EDX
- JE @2
-
- // P = W * X
- AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
- AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
- IMUL EAX,R8D // EAX <- Pr ** Pb **
- SHR ECX,8 // ECX <- 00 Xa 00 Xg
- IMUL ECX,R8D // ECX <- Pa ** Pg **
- ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
- SHR EAX,8 // EAX <- 00 Pr 00 Pb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
- OR EAX,ECX // EAX <- Pa Pr Pg Pb
-
- // W = 1 - W; Q = W * Y
- XOR R8D,$000000FF // R8D <- 1 - R8D
- MOV ECX,EDX // ECX <- Ya Yr Yg Yb
- AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
- AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
- IMUL EDX,R8D // EDX <- Qr ** Qb **
- SHR ECX,8 // ECX <- 00 Ya 00 Yg
- IMUL ECX,R8D // ECX <- Qa ** Qg **
- ADD EDX,bias
- AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
- SHR EDX,8 // EDX <- 00 Qr ** Qb
- ADD ECX,bias
- AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
- OR ECX,EDX // ECX <- Qa Qr Qg Qb
-
- // Z = P + Q (assuming no overflow at each byte)
- ADD EAX,ECX // EAX <- Za Zr Zg Zb
-
- RET
-
-@1: MOV EAX,EDX
-@2: RET
-{$ENDIF}
-{$IFDEF TARGET_x86}
-
- // W = 0 or $FF?
JCXZ @1 // CX = 0 ? => Result := EDX
CMP ECX,$FF // CX = $FF ? => Result := EDX
JE @2
@@ -1583,60 +1539,59 @@
@1: MOV EAX,EDX
@2: RET
{$ENDIF}
-end;
-procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32);
-asm
{$IFDEF TARGET_x64}
- // ECX <- F
- // [RDX] <- B
- // R8 <- W
+ // ECX <- X
+ // EDX <- Y
+ // R8D <- W
- // Check W
- TEST R8,R8 // Set flags for R8
- JZ @2 // W = 0 ? => Result := EDX
- MOV EAX,ECX // EAX <- ** Fr Fg Fb
- CMP R8B,$FF // W = 255? => write F
- JZ @1
+ // W = 0 or $FF?
+ TEST R8D,R8D
+ JZ @1 // W = 0 ? => Result := EDX
+ MOV EAX,ECX // EAX <- Xa Xr Xg Xb
+ CMP R8B,$FF // W = $FF ? => Result := EDX
+ JE @2
- // P = W * F
- AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
- AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
+ // P = W * X
+ AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
+ AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
IMUL EAX,R8D // EAX <- Pr ** Pb **
- SHR ECX,8 // ECX <- 00 Fa 00 Fg
- IMUL ECX,R8D // ECX <- 00 00 Pg **
+ SHR ECX,8 // ECX <- 00 Xa 00 Xg
+ IMUL ECX,R8D // ECX <- Pa ** Pg **
ADD EAX,bias
- AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
+ AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
SHR EAX,8 // EAX <- 00 Pr 00 Pb
ADD ECX,bias
AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
- OR EAX,ECX // EAX <- 00 Pr Pg Pb
+ OR EAX,ECX // EAX <- Pa Pr Pg Pb
- // W = 1 - W; Q = W * B
- MOV R9D,[EDX]
+ // W = 1 - W; Q = W * Y
XOR R8D,$000000FF // R8D <- 1 - R8D
- MOV ECX,R9D // ECX <- Ba Br Bg Bb
- AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
- AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
- IMUL R9D,R8D // R9D <- Qr ** Qb **
- SHR ECX,8 // ECX <- 00 Ba 00 Bg
- IMUL ECX,R8D // ECX <- Qa 00 Qg **
- ADD R9D,bias
- AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
- SHR R9D,8 // R9D <- 00 Qr ** Qb
+ MOV ECX,EDX // ECX <- Ya Yr Yg Yb
+ AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
+ AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
+ IMUL EDX,R8D // EDX <- Qr ** Qb **
+ SHR ECX,8 // ECX <- 00 Ya 00 Yg
+ IMUL ECX,R8D // ECX <- Qa ** Qg **
+ ADD EDX,bias
+ AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
+ SHR EDX,8 // EDX <- 00 Qr ** Qb
ADD ECX,bias
AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
- OR ECX,R9D // ECX <- 00 Qr Qg Qb
+ OR ECX,EDX // ECX <- Qa Qr Qg Qb
// Z = P + Q (assuming no overflow at each byte)
- ADD EAX,ECX // EAX <- 00 Zr Zg Zb
+ ADD EAX,ECX // EAX <- Za Zr Zg Zb
- MOV [EDX],EAX
+ RET
-@1: MOV [EDX],EAX
+@1: MOV EAX,EDX
@2: RET
+{$ENDIF}
+end;
-{$ENDIF}
+procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32);
+asm
{$IFDEF TARGET_x86}
// EAX <- F
// [EDX] <- B
@@ -1697,6 +1652,57 @@
@2: MOV [EDX],EAX
RET
{$ENDIF}
+
+{$IFDEF TARGET_x64}
+ // ECX <- F
+ // [RDX] <- B
+ // R8 <- W
+
+ // Check W
+ TEST R8D,R8D // Set flags for R8
+ JZ @2 // W = 0 ? => Result := EDX
+ MOV EAX,ECX // EAX <- ** Fr Fg Fb
+ CMP R8B,$FF // W = 255? => write F
+ JZ @1
+
+ // P = W * F
+ AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
+ AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
+ IMUL EAX,R8D // EAX <- Pr ** Pb **
+ SHR ECX,8 // ECX <- 00 Fa 00 Fg
+ IMUL ECX,R8D // ECX <- 00 00 Pg **
+ ADD EAX,bias
+ AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
+ SHR EAX,8 // EAX <- 00 Pr 00 Pb
+ ADD ECX,bias
+ AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
+ OR EAX,ECX // EAX <- 00 Pr Pg Pb
+
+ // W = 1 - W; Q = W * B
+ MOV R9D,[EDX]
+ XOR R8D,$000000FF // R8D <- 1 - R8D
+ MOV ECX,R9D // ECX <- Ba Br Bg Bb
+ AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
+ AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
+ IMUL R9D,R8D // R9D <- Qr ** Qb **
+ SHR ECX,8 // ECX <- 00 Ba 00 Bg
+ IMUL ECX,R8D // ECX <- Qa 00 Qg **
+ ADD R9D,bias
+ AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
+ SHR R9D,8 // R9D <- 00 Qr ** Qb
+ ADD ECX,bias
+ AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
+ OR ECX,R9D // ECX <- 00 Qr Qg Qb
+
+ // Z = P + Q (assuming no overflow at each byte)
+ ADD EAX,ECX // EAX <- 00 Zr Zg Zb
+
+ MOV [EDX],EAX
+
+@1: MOV [EDX],EAX
+@2: RET
+
+{$ENDIF}
end;
procedure EMMS_ASM;
@@ -1767,17 +1773,17 @@
function BlendReg_MMX(F, B: TColor32): TColor32;
asm
-{$IFDEF TARGET_x64}
// blend foreground color (F) to a background color (B),
// using alpha channel value of F
- // ECX <- F
+{$IFDEF TARGET_x86}
+ // EAX <- F
// EDX <- B
// Result := Fa * (Frgb - Brgb) + Brgb
- MOVD MM0,ECX
+ MOVD MM0,EAX
PXOR MM3,MM3
MOVD MM2,EDX
PUNPCKLBW MM0,MM3
- MOV RAX,bias_ptr
+ MOV ECX,bias_ptr
PUNPCKLBW MM2,MM3
MOVQ MM1,MM0
PUNPCKHWD MM1,MM1
@@ -1785,23 +1791,22 @@
PUNPCKHDQ MM1,MM1
PSLLW MM2,8
PMULLW MM0,MM1
- PADDW MM2,[RAX]
+ PADDW MM2,[ECX]
PADDW MM2,MM0
PSRLW MM2,8
PACKUSWB MM2,MM3
MOVD EAX,MM2
{$ENDIF}
-{$IFDEF TARGET_x86}
- // blend foreground color (F) to a background color (B),
- // using alpha channel value of F
- // EAX <- F
+
+{$IFDEF TARGET_x64}
+ // ECX <- F
// EDX <- B
// Result := Fa * (Frgb - Brgb) + Brgb
- MOVD MM0,EAX
+ MOVD MM0,ECX
PXOR MM3,MM3
MOVD MM2,EDX
PUNPCKLBW MM0,MM3
- MOV ECX,bias_ptr
+ MOV RAX,bias_ptr
PUNPCKLBW MM2,MM3
MOVQ MM1,MM0
PUNPCKHWD MM1,MM1
@@ -1809,7 +1814,7 @@
PUNPCKHDQ MM1,MM1
PSLLW MM2,8
PMULLW MM0,MM1
- PADDW MM2,[ECX]
+ PADDW MM2,[RAX]
PADDW MM2,MM0
PSRLW MM2,8
PACKUSWB MM2,MM3
@@ -1897,43 +1902,6 @@
procedure BlendMemEx_MMX(F: TColor32; var B:TColor32; M: TColor32);
asm
-{$IFDEF TARGET_x64}
- // blend foreground color (F) to a background color (B),
- // using alpha channel value of F
- // ECX <- F
- // [EDX] <- B
- // R8 <- M
- // Result := M * Fa * (Frgb - Brgb) + Brgb
- TEST ECX,$FF000000
- JZ @1
-
- MOV EAX,ECX
- SHR EAX,24
- INC R8 // 255:256 range bias
- IMUL R8,EAX
- SHR R8,8
- JZ @1
-
- PXOR MM0,MM0
- MOVD MM1,ECX
- SHL R8,4
- MOVD MM2,[RDX]
- PUNPCKLBW MM1,MM0
- PUNPCKLBW MM2,MM0
- ADD R8,alpha_ptr
- PSUBW MM1,MM2
- PMULLW MM1,[R8]
- PSLLW MM2,8
- MOV RAX,bias_ptr
- PADDW MM2,[RAX]
- PADDW MM1,MM2
- PSRLW MM1,8
- PACKUSWB MM1,MM0
- MOVD [RDX],MM1
-
-@1:
-{$ENDIF}
-
{$IFDEF TARGET_x86}
// blend foreground color (F) to a background color (B),
// using alpha channel value of F
@@ -1973,6 +1941,43 @@
@2:
{$ENDIF}
+
+{$IFDEF TARGET_x64}
+ // blend foreground color (F) to a background color (B),
+ // using alpha channel value of F
+ // ECX <- F
+ // [EDX] <- B
+ // R8 <- M
+ // Result := M * Fa * (Frgb - Brgb) + Brgb
+ TEST ECX,$FF000000
+ JZ @1
+
+ MOV EAX,ECX
+ SHR EAX,24
+ INC R8 // 255:256 range bias
+ IMUL R8,EAX
+ SHR R8,8
+ JZ @1
+
+ PXOR MM0,MM0
+ MOVD MM1,ECX
+ SHL R8,4
+ MOVD MM2,[RDX]
+ PUNPCKLBW MM1,MM0
+ PUNPCKLBW MM2,MM0
+ ADD R8,alpha_ptr
+ PSUBW MM1,MM2
+ PMULLW MM1,[R8]
+ PSLLW MM2,8
+ MOV RAX,bias_ptr
+ PADDW MM2,[RAX]
+ PADDW MM1,MM2
+ PSRLW MM1,8
+ PACKUSWB MM1,MM0
+ MOVD [RDX],MM1
+
+@1:
+{$ENDIF}
end;
{$IFDEF TARGET_x86}
@@ -2099,6 +2104,35 @@
function CombineReg_MMX(X, Y, W: TColor32): TColor32;
asm
+{$IFDEF TARGET_X86}
+ // EAX - Color X
+ // EDX - Color Y
+ // ECX - Weight of X [0..255]
+ // Result := W * (X - Y) + Y
+
+ MOVD MM1,EAX
+ PXOR MM0,MM0
+ SHL ECX,4
+
+ MOVD MM2,EDX
+ PUNPCKLBW MM1,MM0
+ PUNPCKLBW MM2,MM0
+
+ ADD ECX,alpha_ptr
+
+ PSUBW MM1,MM2
+ PMULLW MM1,[ECX]
+ PSLLW MM2,8
+
+ MOV ECX,bias_ptr
+
+ PADDW MM2,[ECX]
+ PADDW MM1,MM2
+ PSRLW MM1,8
+ PACKUSWB MM1,MM0
+ MOVD EAX,MM1
+{$ENDIF}
+
{$IFDEF TARGET_X64}
// ECX - Color X
// EDX - Color Y
@@ -2127,17 +2161,26 @@
PACKUSWB MM1,MM0
MOVD EAX,MM1
{$ENDIF}
+end;
+
+procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: TColor32);
+asm
{$IFDEF TARGET_X86}
// EAX - Color X
- // EDX - Color Y
+ // [EDX] - Color Y
// ECX - Weight of X [0..255]
// Result := W * (X - Y) + Y
+ JCXZ @1
+ CMP ECX,$FF
+ JZ @2
+
MOVD MM1,EAX
PXOR MM0,MM0
+
SHL ECX,4
- MOVD MM2,EDX
+ MOVD MM2,[EDX]
PUNPCKLBW MM1,MM0
PUNPCKLBW MM2,MM0
@@ -2153,12 +2196,13 @@
PADDW MM1,MM2
PSRLW MM1,8
PACKUSWB MM1,MM0
- MOVD EAX,MM1
+ MOVD [EDX],MM1
+
+@1: RET
+
+@2: MOV [EDX],EAX
{$ENDIF}
-end;
-procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: TColor32);
-asm
{$IFDEF TARGET_x64}
// ECX - Color X
// [RDX] - Color Y
@@ -2197,43 +2241,6 @@
@2: MOV [RDX],RCX
{$ENDIF}
-{$IFDEF TARGET_X86}
- // EAX - Color X
- // [EDX] - Color Y
- // ECX - Weight of X [0..255]
- // Result := W * (X - Y) + Y
-
- JCXZ @1
- CMP ECX,$FF
- JZ @2
-
- MOVD MM1,EAX
- PXOR MM0,MM0
-
- SHL ECX,4
-
- MOVD MM2,[EDX]
- PUNPCKLBW MM1,MM0
- PUNPCKLBW MM2,MM0
-
- ADD ECX,alpha_ptr
-
- PSUBW MM1,MM2
- PMULLW MM1,[ECX]
- PSLLW MM2,8
-
- MOV ECX,bias_ptr
-
- PADDW MM2,[ECX]
- PADDW MM1,MM2
- PSRLW MM1,8
- PACKUSWB MM1,MM0
- MOVD [EDX],MM1
-
-@1: RET
-
-@2: MOV [EDX],EAX
-{$ENDIF}
end;
{$IFDEF TARGET_x86}
@@ -2303,9 +2310,9 @@
function LightenReg_MMX(C: TColor32; Amount: Integer): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
MOVD MM1, EDX
- MOVD MM0, ECX
+ MOVD MM0, EAX
MOVQ MM2, MM1
PSLLW MM2, 8
POR MM2, MM1
@@ -2314,9 +2321,10 @@
PADDUSB MM0, MM1
MOVD EAX, MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
MOVD MM1, EDX
- MOVD MM0, EAX
+ MOVD MM0, ECX
MOVQ MM2, MM1
PSLLW MM2, 8
POR MM2, MM1
@@ -2331,14 +2339,15 @@
function ColorAdd_MMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD MM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD MM0,EAX
MOVD MM1,EDX
PADDUSB MM0,MM1
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD MM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD MM0,ECX
MOVD MM1,EDX
PADDUSB MM0,MM1
MOVD EAX,MM0
@@ -2347,14 +2356,15 @@
function ColorSub_MMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD MM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD MM0,EAX
MOVD MM1,EDX
PSUBUSB MM0,MM1
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD MM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD MM0,ECX
MOVD MM1,EDX
PSUBUSB MM0,MM1
MOVD EAX,MM0
@@ -2363,9 +2373,9 @@
function ColorModulate_MMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
PXOR MM2,MM2
- MOVD MM0,ECX
+ MOVD MM0,EAX
PUNPCKLBW MM0,MM2
MOVD MM1,EDX
PUNPCKLBW MM1,MM2
@@ -2374,9 +2384,10 @@
PACKUSWB MM0,MM2
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
PXOR MM2,MM2
- MOVD MM0,EAX
+ MOVD MM0,ECX
PUNPCKLBW MM0,MM2
MOVD MM1,EDX
PUNPCKLBW MM1,MM2
@@ -2389,14 +2400,15 @@
function ColorMax_EMMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD MM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD MM0,EAX
MOVD MM1,EDX
PMAXUB MM0,MM1
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD MM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD MM0,ECX
MOVD MM1,EDX
PMAXUB MM0,MM1
MOVD EAX,MM0
@@ -2405,14 +2417,15 @@
function ColorMin_EMMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD MM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD MM0,EAX
MOVD MM1,EDX
PMINUB MM0,MM1
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD MM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD MM0,ECX
MOVD MM1,EDX
PMINUB MM0,MM1
MOVD EAX,MM0
@@ -2421,8 +2434,8 @@
function ColorDifference_MMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD MM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD MM0,EAX
MOVD MM1,EDX
MOVQ MM2,MM0
PSUBUSB MM0,MM1
@@ -2430,8 +2443,9 @@
POR MM0,MM1
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD MM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD MM0,ECX
MOVD MM1,EDX
MOVQ MM2,MM0
PSUBUSB MM0,MM1
@@ -2443,9 +2457,9 @@
function ColorExclusion_MMX(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
PXOR MM2,MM2
- MOVD MM0,ECX
+ MOVD MM0,EAX
PUNPCKLBW MM0,MM2
MOVD MM1,EDX
PUNPCKLBW MM1,MM2
@@ -2457,9 +2471,10 @@
PACKUSWB MM0,MM2
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
PXOR MM2,MM2
- MOVD MM0,EAX
+ MOVD MM0,ECX
PUNPCKLBW MM0,MM2
MOVD MM1,EDX
PUNPCKLBW MM1,MM2
@@ -2475,24 +2490,25 @@
function ColorScale_MMX(C, W: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
PXOR MM2,MM2
- SHL RDX,4
- MOVD MM0,ECX
+ SHL EDX,4
+ MOVD MM0,EAX
PUNPCKLBW MM0,MM2
- ADD RDX,alpha_ptr
- PMULLW MM0,[RDX]
+ ADD EDX,alpha_ptr
+ PMULLW MM0,[EDX]
PSRLW MM0,8
PACKUSWB MM0,MM2
MOVD EAX,MM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
PXOR MM2,MM2
- SHL EDX,4
- MOVD MM0,EAX
+ SHL RDX,4
+ MOVD MM0,ECX
PUNPCKLBW MM0,MM2
- ADD EDX,alpha_ptr
- PMULLW MM0,[EDX]
+ ADD RDX,alpha_ptr
+ PMULLW MM0,[RDX]
PSRLW MM0,8
PACKUSWB MM0,MM2
MOVD EAX,MM0
@@ -2510,37 +2526,38 @@
// EDX <- B
// Result := Fa * (Frgb - Brgb) + Brgb
-{$IFDEF TARGET_x64}
- MOVD XMM0,ECX
+{$IFDEF TARGET_x86}
+ MOVD XMM0,EAX
PXOR XMM3,XMM3
MOVD XMM2,EDX
PUNPCKLBW XMM0,XMM3
- MOV RAX,bias_ptr
+ MOV ECX,bias_ptr
PUNPCKLBW XMM2,XMM3
MOVQ XMM1,XMM0
PSHUFLW XMM1,XMM1, $FF
PSUBW XMM0,XMM2
PSLLW XMM2,8
PMULLW XMM0,XMM1
- PADDW XMM2,[RAX]
+ PADDW XMM2,[ECX]
PADDW XMM2,XMM0
PSRLW XMM2,8
PACKUSWB XMM2,XMM3
MOVD EAX,XMM2
{$ENDIF}
-{$IFDEF TARGET_x86}
- MOVD XMM0,EAX
+
+{$IFDEF TARGET_x64}
+ MOVD XMM0,ECX
PXOR XMM3,XMM3
MOVD XMM2,EDX
PUNPCKLBW XMM0,XMM3
- MOV ECX,bias_ptr
+ MOV RAX,bias_ptr
PUNPCKLBW XMM2,XMM3
MOVQ XMM1,XMM0
PSHUFLW XMM1,XMM1, $FF
PSUBW XMM0,XMM2
PSLLW XMM2,8
PMULLW XMM0,XMM1
- PADDW XMM2,[ECX]
+ PADDW XMM2,[RAX]
PADDW XMM2,XMM0
PSRLW XMM2,8
PACKUSWB XMM2,XMM3
@@ -2550,68 +2567,68 @@
procedure BlendMem_SSE2(F: TColor32; var B: TColor32);
asm
-{$IFDEF TARGET_x64}
- // ECX - Color X
+{$IFDEF TARGET_x86}
+ // EAX - Color X
// [EDX] - Color Y
// Result := W * (X - Y) + Y
- TEST ECX,$FF000000
+ TEST EAX,$FF000000
JZ @1
- CMP ECX,$FF000000
+ CMP EAX,$FF000000
JNC @2
PXOR XMM3,XMM3
- MOVD XMM0,ECX
- MOVD XMM2,[RDX]
+ MOVD XMM0,EAX
+ MOVD XMM2,[EDX]
PUNPCKLBW XMM0,XMM3
- MOV RAX,bias_ptr
+ MOV ECX,bias_ptr
PUNPCKLBW XMM2,XMM3
MOVQ XMM1,XMM0
PSHUFLW XMM1,XMM1, $FF
PSUBW XMM0,XMM2
PSLLW XMM2,8
PMULLW XMM0,XMM1
- PADDW XMM2,[RAX]
+ PADDW XMM2,[ECX]
PADDW XMM2,XMM0
PSRLW XMM2,8
PACKUSWB XMM2,XMM3
- MOVD [RDX],XMM2
+ MOVD [EDX],XMM2
@1: RET
-@2: MOV [RDX], ECX
+@2: MOV [EDX], EAX
{$ENDIF}
-{$IFDEF TARGET_x86}
- // EAX - Color X
+{$IFDEF TARGET_x64}
+ // ECX - Color X
// [EDX] - Color Y
// Result := W * (X - Y) + Y
- TEST EAX,$FF000000
+ TEST ECX,$FF000000
JZ @1
- CMP EAX,$FF000000
+ CMP ECX,$FF000000
JNC @2
PXOR XMM3,XMM3
- MOVD XMM0,EAX
- MOVD XMM2,[EDX]
+ MOVD XMM0,ECX
+ MOVD XMM2,[RDX]
PUNPCKLBW XMM0,XMM3
- MOV ECX,bias_ptr
+ MOV RAX,bias_ptr
PUNPCKLBW XMM2,XMM3
MOVQ XMM1,XMM0
PSHUFLW XMM1,XMM1, $FF
PSUBW XMM0,XMM2
PSLLW XMM2,8
PMULLW XMM0,XMM1
- PADDW XMM2,[ECX]
+ PADDW XMM2,[RAX]
PADDW XMM2,XMM0
PSRLW XMM2,8
PACKUSWB XMM2,XMM3
- MOVD [EDX],XMM2
+ MOVD [RDX],XMM2
@1: RET
-@2: MOV [EDX], EAX
+@2: MOV [RDX], ECX
{$ENDIF}
end;
@@ -2621,6 +2638,42 @@
// using alpha channel value of F
// Result := M * Fa * (Frgb - Brgb) + Brgb
+{$IFDEF TARGET_x86}
+ // EAX <- F
+ // EDX <- B
+ // ECX <- M
+ PUSH EBX
+ MOV EBX,EAX
+ SHR EBX,24
+ INC ECX // 255:256 range bias
+ IMUL ECX,EBX
+ SHR ECX,8
+ JZ @1
+
+ PXOR XMM0,XMM0
+ MOVD XMM1,EAX
+ SHL ECX,4
+ MOVD XMM2,EDX
+ PUNPCKLBW XMM1,XMM0
+ PUNPCKLBW XMM2,XMM0
+ ADD ECX,alpha_ptr
+ PSUBW XMM1,XMM2
+ PMULLW XMM1,[ECX]
+ PSLLW XMM2,8
+ MOV ECX,bias_ptr
+ PADDW XMM2,[ECX]
+ PADDW XMM1,XMM2
+ PSRLW XMM1,8
+ PACKUSWB XMM1,XMM0
+ MOVD EAX,XMM1
+
+ POP EBX
+ RET
+
+@1: MOV EAX,EDX
+ POP EBX
+{$ENDIF}
+
{$IFDEF TARGET_x64}
// ECX <- F
// EDX <- B
@@ -2653,11 +2706,20 @@
@1: MOV EAX,EDX
{$ENDIF}
+end;
+procedure BlendMemEx_SSE2(F: TColor32; var B:TColor32; M: TColor32);
+asm
{$IFDEF TARGET_x86}
+ // blend foreground color (F) to a background color (B),
+ // using alpha channel value of F
// EAX <- F
- // EDX <- B
+ // [EDX] <- B
// ECX <- M
+ // Result := M * Fa * (Frgb - Brgb) + Brgb
+ TEST EAX,$FF000000
+ JZ @2
+
PUSH EBX
MOV EBX,EAX
SHR EBX,24
@@ -2669,7 +2731,7 @@
PXOR XMM0,XMM0
MOVD XMM1,EAX
SHL ECX,4
- MOVD XMM2,EDX
+ MOVD XMM2,[EDX]
PUNPCKLBW XMM1,XMM0
PUNPCKLBW XMM2,XMM0
ADD ECX,alpha_ptr
@@ -2681,18 +2743,14 @@
PADDW XMM1,XMM2
PSRLW XMM1,8
PACKUSWB XMM1,XMM0
- MOVD EAX,XMM1
+ MOVD [EDX],XMM1
+@1:
POP EBX
- RET
-@1: MOV EAX,EDX
- POP EBX
+@2:
{$ENDIF}
-end;
-procedure BlendMemEx_SSE2(F: TColor32; var B:TColor32; M: TColor32);
-asm
{$IFDEF TARGET_x64}
// blend foreground color (F) to a background color (B),
// using alpha channel value of F
@@ -2729,63 +2787,25 @@
MOVD DWORD PTR [RDX],XMM1
@1:
{$ENDIF}
-
-{$IFDEF TARGET_x86}
- // blend foreground color (F) to a background color (B),
- // using alpha channel value of F
- // EAX <- F
- // [EDX] <- B
- // ECX <- M
- // Result := M * Fa * (Frgb - Brgb) + Brgb
- TEST EAX,$FF000000
- JZ @2
-
- PUSH EBX
- MOV EBX,EAX
- SHR EBX,24
- INC ECX // 255:256 range bias
- IMUL ECX,EBX
- SHR ECX,8
- JZ @1
-
- PXOR XMM0,XMM0
- MOVD XMM1,EAX
- SHL ECX,4
- MOVD XMM2,[EDX]
- PUNPCKLBW XMM1,XMM0
- PUNPCKLBW XMM2,XMM0
- ADD ECX,alpha_ptr
- PSUBW XMM1,XMM2
- PMULLW XMM1,[ECX]
- PSLLW XMM2,8
- MOV ECX,bias_ptr
- PADDW XMM2,[ECX]
- PADDW XMM1,XMM2
- PSRLW XMM1,8
- PACKUSWB XMM1,XMM0
- MOVD [EDX],XMM1
-
-@1:
- POP EBX
-
-@2:
-{$ENDIF}
end;
procedure BlendLine_SSE2(Src, Dst: PColor32; Count: Integer);
asm
-{$IFDEF TARGET_X64}
- // ECX <- Src
+{$IFDEF TARGET_X86}
+ // EAX <- Src
// EDX <- Dst
- // R8D <- Count
+ // ECX <- Count
- TEST R8D,R8D
+ TEST ECX,ECX
JZ @4
- MOV R10,RCX
- MOV R11,RDX
+ PUSH ESI
+ PUSH EDI
-@1: MOV EAX,[R10]
+ MOV ESI,EAX
+ MOV EDI,EDX
+
+@1: MOV EAX,[ESI]
TEST EAX,$FF000000
JZ @3
CMP EAX,$FF000000
@@ -2793,9 +2813,9 @@
MOVD XMM0,EAX
PXOR XMM3,XMM3
- MOVD XMM2,[R11]
+ MOVD XMM2,[EDI]
PUNPCKLBW XMM0,XMM3
- MOV RAX,bias_ptr
+ MOV EAX,bias_ptr
PUNPCKLBW XMM2,XMM3
MOVQ XMM1,XMM0
PUNPCKLBW XMM1,XMM3
@@ -2804,38 +2824,38 @@
PUNPCKHDQ XMM1,XMM1
PSLLW XMM2,8
PMULLW XMM0,XMM1
- PADDW XMM2,[RAX]
+ PADDW XMM2,[EAX]
PADDW XMM2,XMM0
PSRLW XMM2,8
PACKUSWB XMM2,XMM3
MOVD EAX, XMM2
-@2: MOV [R11],EAX
+@2: MOV [EDI],EAX
-@3: ADD R10,4
- ADD R11,4
+@3: ADD ESI,4
+ ADD EDI,4
- DEC R8D
+ DEC ECX
JNZ @1
+ POP EDI
+ POP ESI
+
@4: RET
{$ENDIF}
-{$IFDEF TARGET_X86}
- // EAX <- Src
+{$IFDEF TARGET_X64}
+ // ECX <- Src
// EDX <- Dst
- // ECX <- Count
+ // R8D <- Count
- TEST ECX,ECX
+ TEST R8D,R8D
JZ @4
- PUSH ESI
- PUSH EDI
+ MOV R10,RCX
+ MOV R11,RDX
- MOV ESI,EAX
- MOV EDI,EDX
-
-@1: MOV EAX,[ESI]
+@1: MOV EAX,[R10]
TEST EAX,$FF000000
JZ @3
CMP EAX,$FF000000
@@ -2843,9 +2863,9 @@
MOVD XMM0,EAX
PXOR XMM3,XMM3
- MOVD XMM2,[EDI]
+ MOVD XMM2,[R11]
PUNPCKLBW XMM0,XMM3
- MOV EAX,bias_ptr
+ MOV RAX,bias_ptr
PUNPCKLBW XMM2,XMM3
MOVQ XMM1,XMM0
PUNPCKLBW XMM1,XMM3
@@ -2854,31 +2874,28 @@
PUNPCKHDQ XMM1,XMM1
PSLLW XMM2,8
PMULLW XMM0,XMM1
- PADDW XMM2,[EAX]
+ PADDW XMM2,[RAX]
PADDW XMM2,XMM0
PSRLW XMM2,8
PACKUSWB XMM2,XMM3
MOVD EAX, XMM2
-@2: MOV [EDI],EAX
+@2: MOV [R11],EAX
-@3: ADD ESI,4
- ADD EDI,4
+@3: ADD R10,4
+ ADD R11,4
- DEC ECX
+ DEC R8D
JNZ @1
- POP EDI
- POP ESI
-
@4: RET
{$ENDIF}
end;
-{$IFDEF TARGET_X86}
procedure BlendLineEx_SSE2(Src, Dst: PColor32; Count: Integer; M: TColor32);
asm
+{$IFDEF TARGET_X86}
// EAX <- Src
// EDX <- Dst
// ECX <- Count
@@ -2904,7 +2921,7 @@
INC EBX // 255:256 range bias
IMUL EBX,EDX
SHR EBX,8
- JZ @3 // complete transparency, proceed to next point
+ JZ @3 // complete transparency, proceed to next point
// blend
PXOR XMM0,XMM0
@@ -2937,40 +2954,65 @@
POP EDI
POP ESI
@4:
-end;
-
{$ENDIF}
-function CombineReg_SSE2(X, Y, W: TColor32): TColor32;
-asm
{$IFDEF TARGET_X64}
- // ECX - Color X
- // EDX - Color Y
- // R8 - Weight of X [0..255]
- // Result := W * (X - Y) + Y
+ // ECX <- Src
+ // EDX <- Dst
+ // R8D <- Count
+ // R9D <- M
+ // test the counter for zero or negativity
+ TEST R8D,R8D
+ JS @4
+ TEST R9D,R9D
+ JZ @4
+
+ MOV R10,RCX // ESI <- Src
+
+ // loop start
+@1: MOV ECX,[R10]
+ TEST ECX,$FF000000
+ JZ @3 // complete transparency, proceed to next point
+ MOV EAX,ECX
+ SHR EAX,24
+ INC EAX // 255:256 range bias
+ IMUL EAX,R9D
+ SHR EAX,8
+ JZ @3 // complete transparency, proceed to next point
+
+ // blend
+ PXOR XMM0,XMM0
MOVD XMM1,ECX
- PXOR XMM0,XMM0
- SHL R8,4
-
- MOVD XMM2,EDX
+ SHL EAX,4
+ MOVD XMM2,[RDX]
PUNPCKLBW XMM1,XMM0
PUNPCKLBW XMM2,XMM0
-
- ADD R8,alpha_ptr
-
+ ADD RAX,alpha_ptr
PSUBW XMM1,XMM2
- PMULLW XMM1,[R8]
+ PMULLW XMM1,[RAX]
PSLLW XMM2,8
-
- MOV R8,bias_ptr
-
- PADDW XMM2,[R8]
+ MOV RAX,bias_ptr
+ PADDW XMM2,[RAX]
PADDW XMM1,XMM2
PSRLW XMM1,8
PACKUSWB XMM1,XMM0
- MOVD EAX,XMM1
+ MOVD ECX,XMM1
+
+@2: MOV [RDX],ECX
+
+@3: ADD R10,4
+ ADD RDX,4
+
+ // loop end
+ DEC R8D
+ JNZ @1
+@4:
{$ENDIF}
+end;
+
+function CombineReg_SSE2(X, Y, W: TColor32): TColor32;
+asm
{$IFDEF TARGET_X86}
// EAX - Color X
// EDX - Color Y
@@ -2999,27 +3041,18 @@
PACKUSWB XMM1,XMM0
MOVD EAX,XMM1
{$ENDIF}
-end;
-procedure CombineMem_SSE2(F: TColor32; var B: TColor32; W: TColor32);
-asm
{$IFDEF TARGET_X64}
// ECX - Color X
- // [RDX] - Color Y
+ // EDX - Color Y
// R8 - Weight of X [0..255]
// Result := W * (X - Y) + Y
- TEST R8,R8 // Set flags for R8
- JZ @1 // W = 0 ? => Result := EDX
- CMP R8,$FF
- JZ @2
-
MOVD XMM1,ECX
PXOR XMM0,XMM0
-
SHL R8,4
- MOVD XMM2,[RDX]
+ MOVD XMM2,EDX
PUNPCKLBW XMM1,XMM0
PUNPCKLBW XMM2,XMM0
@@ -3029,18 +3062,18 @@
PMULLW XMM1,[R8]
PSLLW XMM2,8
- MOV RAX,bias_ptr
+ MOV R8,bias_ptr
- PADDW XMM2,[RAX]
+ PADDW XMM2,[R8]
PADDW XMM1,XMM2
PSRLW XMM1,8
PACKUSWB XMM1,XMM0
- MOVD [RDX],XMM1
+ MOVD EAX,XMM1
+{$ENDIF}
+end;
-@1: RET
-
-@2: MOV [RDX],ECX
-{$ENDIF}
+procedure CombineMem_SSE2(F: TColor32; var B: TColor32; W: TColor32);
+asm
{$IFDEF TARGET_X86}
// EAX - Color X
// [EDX] - Color Y
@@ -3079,12 +3112,51 @@
@2: MOV [EDX],EAX
{$ENDIF}
+
+{$IFDEF TARGET_X64}
+ // ECX - Color X
+ // [RDX] - Color Y
+ // R8 - Weight of X [0..255]
+ // Result := W * (X - Y) + Y
+
+ TEST R8,R8 // Set flags for R8
+ JZ @1 // W = 0 ? => Result := EDX
+ CMP R8,$FF
+ JZ @2
+
+ MOVD XMM1,ECX
+ PXOR XMM0,XMM0
+
+ SHL R8,4
+
+ MOVD XMM2,[RDX]
+ PUNPCKLBW XMM1,XMM0
+ PUNPCKLBW XMM2,XMM0
+
+ ADD R8,alpha_ptr
+
+ PSUBW XMM1,XMM2
+ PMULLW XMM1,[R8]
+ PSLLW XMM2,8
+
+ MOV RAX,bias_ptr
+
+ PADDW XMM2,[RAX]
+ PADDW XMM1,XMM2
+ PSRLW XMM1,8
+ PACKUSWB XMM1,XMM0
+ MOVD [RDX],XMM1
+
+@1: RET
+
+@2: MOV [RDX],ECX
+{$ENDIF}
end;
-{$IFDEF TARGET_X86}
procedure CombineLine_SSE2(Src, Dst: PColor32; Count: Integer; W: TColor32);
asm
+{$IFDEF TARGET_X86}
// EAX <- Src
// EDX <- Dst
// ECX <- Count
@@ -3139,9 +3211,58 @@
@4: SHL ECX,2
CALL Move
POP EBX
-end;
+{$ENDIF}
+{$IFDEF TARGET_X64}
+ // ECX <- Src
+ // EDX <- Dst
+ // R8D <- Count
+
+ // Result := W * (X - Y) + Y
+
+ TEST R8D,R8D
+ JZ @2
+
+ TEST R9D,R9D
+ JZ @2
+
+ CMP R9D,$FF
+ JZ @3
+
+ SHL R9D,4
+ ADD R9,alpha_ptr
+ MOVQ XMM3,[R9]
+ MOV R9,bias_ptr
+ MOVQ XMM4,[R9]
+
+@1: MOVD XMM1,[RCX]
+ PXOR XMM0,XMM0
+ MOVD XMM2,[RDX]
+ PUNPCKLBW XMM1,XMM0
+ PUNPCKLBW XMM2,XMM0
+
+ PSUBW XMM1,XMM2
+ PMULLW XMM1,XMM3
+ PSLLW XMM2,8
+
+ PADDW XMM2,XMM4
+ PADDW XMM1,XMM2
+ PSRLW XMM1,8
+ PACKUSWB XMM1,XMM0
+ MOVD [RDX],XMM1
+
+ ADD RCX,4
+ ADD RDX,4
+
+ DEC R8D
+ JNZ @1
+
+@2: RET
+
+@3: SHL R8D,2
+ CALL Move
{$ENDIF}
+end;
function MergeReg_SSE2(F, B: TColor32): TColor32;
asm
@@ -3171,14 +3292,13 @@
(1 - Fa) * (1 - Ba) = 1 - Fa - Ba + Fa * Ba = (1 - Ra)
}
-{$IFDEF TARGET_X64}
- TEST ECX,$FF000000 // foreground completely transparent =>
- JZ @1 // result = background
- MOV EAX,ECX // EAX <- Fa
- CMP EAX,$FF000000 // foreground completely opaque =>
- JNC @2 // result = foreground
- TEST EDX,$FF000000 // background completely transparent =>
- JZ @2 // result = foreground
+{$IFDEF TARGET_X86}
+ TEST EAX,$FF000000 // foreground completely transparent =>
+ JZ @1 // result = background
+ CMP EAX,$FF000000 // foreground completely opaque =>
+ JNC @2 // result = foreground
+ TEST EDX,$FF000000 // background completely transparent =>
+ JZ @2 // result = foreground
PXOR XMM7,XMM7 // XMM7 <- 00
MOVD XMM0,EAX // XMM0 <- Fa Fr Fg Fb
@@ -3213,14 +3333,16 @@
@1: MOV EAX,EDX
@2:
{$ENDIF}
-{$IFDEF TARGET_X86}
- TEST EAX,$FF000000 // foreground completely transparent =>
- JZ @1 // result = background
- CMP EAX,$FF000000 // foreground completely opaque =>
- JNC @2 // result = foreground
- TEST EDX,$FF000000 // background completely transparent =>
- JZ @2 // result = foreground
+{$IFDEF TARGET_X64}
+ TEST ECX,$FF000000 // foreground completely transparent =>
+ JZ @1 // result = background
+ MOV EAX,ECX // EAX <- Fa
+ CMP EAX,$FF000000 // foreground completely opaque =>
+ JNC @2 // result = foreground
+ TEST EDX,$FF000000 // background completely transparent =>
+ JZ @2 // result = foreground
+
PXOR XMM7,XMM7 // XMM7 <- 00
MOVD XMM0,EAX // XMM0 <- Fa Fr Fg Fb
SHR EAX,24 // EAX <- Fa
@@ -3263,9 +3385,9 @@
function LightenReg_SSE2(C: TColor32; Amount: Integer): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
MOVD XMM1, EDX
- MOVD XMM0, ECX
+ MOVD XMM0, EAX
MOVQ XMM2, XMM1
PSLLW XMM2, 8
POR XMM2, XMM1
@@ -3274,9 +3396,10 @@
PADDUSB XMM0, XMM1
MOVD EAX, XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
MOVD XMM1, EDX
- MOVD XMM0, EAX
+ MOVD XMM0, ECX
MOVQ XMM2, XMM1
PSLLW XMM2, 8
POR XMM2, XMM1
@@ -3292,14 +3415,15 @@
function ColorAdd_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD XMM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD XMM0,EAX
MOVD XMM1,EDX
PADDUSB XMM0,XMM1
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD XMM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD XMM0,ECX
MOVD XMM1,EDX
PADDUSB XMM0,XMM1
MOVD EAX,XMM0
@@ -3308,14 +3432,15 @@
function ColorSub_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD XMM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD XMM0,EAX
MOVD XMM1,EDX
PSUBUSB XMM0,XMM1
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD XMM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD XMM0,ECX
MOVD XMM1,EDX
PSUBUSB XMM0,XMM1
MOVD EAX,XMM0
@@ -3324,9 +3449,9 @@
function ColorModulate_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
PXOR XMM2,XMM2
- MOVD XMM0,ECX
+ MOVD XMM0,EAX
PUNPCKLBW XMM0,XMM2
MOVD XMM1,EDX
PUNPCKLBW XMM1,XMM2
@@ -3335,9 +3460,10 @@
PACKUSWB XMM0,XMM2
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
PXOR XMM2,XMM2
- MOVD XMM0,EAX
+ MOVD XMM0,ECX
PUNPCKLBW XMM0,XMM2
MOVD XMM1,EDX
PUNPCKLBW XMM1,XMM2
@@ -3350,14 +3476,15 @@
function ColorMax_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD XMM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD XMM0,EAX
MOVD XMM1,EDX
PMAXUB XMM0,XMM1
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD XMM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD XMM0,ECX
MOVD XMM1,EDX
PMAXUB XMM0,XMM1
MOVD EAX,XMM0
@@ -3366,14 +3493,15 @@
function ColorMin_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD XMM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD XMM0,EAX
MOVD XMM1,EDX
PMINUB XMM0,XMM1
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD XMM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD XMM0,ECX
MOVD XMM1,EDX
PMINUB XMM0,XMM1
MOVD EAX,XMM0
@@ -3382,8 +3510,8 @@
function ColorDifference_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
- MOVD XMM0,ECX
+{$IFDEF TARGET_X86}
+ MOVD XMM0,EAX
MOVD XMM1,EDX
MOVQ XMM2,XMM0
PSUBUSB XMM0,XMM1
@@ -3391,8 +3519,9 @@
POR XMM0,XMM1
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
- MOVD XMM0,EAX
+
+{$IFDEF TARGET_X64}
+ MOVD XMM0,ECX
MOVD XMM1,EDX
MOVQ XMM2,XMM0
PSUBUSB XMM0,XMM1
@@ -3404,9 +3533,9 @@
function ColorExclusion_SSE2(C1, C2: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
PXOR XMM2,XMM2
- MOVD XMM0,ECX
+ MOVD XMM0,EAX
PUNPCKLBW XMM0,XMM2
MOVD XMM1,EDX
PUNPCKLBW XMM1,XMM2
@@ -3418,9 +3547,10 @@
PACKUSWB XMM0,XMM2
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
PXOR XMM2,XMM2
- MOVD XMM0,EAX
+ MOVD XMM0,ECX
PUNPCKLBW XMM0,XMM2
MOVD XMM1,EDX
PUNPCKLBW XMM1,XMM2
@@ -3436,24 +3566,25 @@
function ColorScale_SSE2(C, W: TColor32): TColor32;
asm
-{$IFDEF TARGET_X64}
+{$IFDEF TARGET_X86}
PXOR XMM2,XMM2
- SHL RDX,4
- MOVD XMM0,ECX
+ SHL EDX,4
+ MOVD XMM0,EAX
PUNPCKLBW XMM0,XMM2
- ADD RDX,alpha_ptr
- PMULLW XMM0,[RDX]
+ ADD EDX,alpha_ptr
+ PMULLW XMM0,[EDX]
PSRLW XMM0,8
PACKUSWB XMM0,XMM2
MOVD EAX,XMM0
{$ENDIF}
-{$IFDEF TARGET_X86}
+
+{$IFDEF TARGET_X64}
PXOR XMM2,XMM2
- SHL EDX,4
- MOVD XMM0,EAX
+ SHL RDX,4
+ MOVD XMM0,ECX
PUNPCKLBW XMM0,XMM2
- ADD EDX,alpha_ptr
- PMULLW XMM0,[EDX]
+ ADD RDX,alpha_ptr
+ PMULLW XMM0,[RDX]
PSRLW XMM0,8
PACKUSWB XMM0,XMM2
MOVD EAX,XMM0
@@ -3606,6 +3737,7 @@
BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_ASM, []);
BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_ASM, []);
BlendRegistry.Add(FID_BLENDLINE, @BlendLine_ASM, []);
+ BlendRegistry.Add(FID_LIGHTEN, @LightenReg_ASM, []);
{$IFNDEF OMIT_MMX}
BlendRegistry.Add(FID_EMMS, @EMMS_MMX, [ciMMX]);
BlendRegistry.Add(FID_COMBINEREG, @CombineReg_MMX, [ciMMX]);
@@ -3631,16 +3763,13 @@
BlendRegistry.Add(FID_MERGEREG, @MergeReg_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_COMBINEREG, @CombineReg_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_COMBINEMEM, @CombineMem_SSE2, [ciSSE2]);
+ BlendRegistry.Add(FID_COMBINELINE, @CombineLine_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_BLENDREG, @BlendReg_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_BLENDMEM, @BlendMem_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_BLENDMEMEX, @BlendMemEx_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_BLENDLINE, @BlendLine_SSE2, [ciSSE2]);
+ BlendRegistry.Add(FID_BLENDLINEEX, @BlendLineEx_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_BLENDREGEX, @BlendRegEx_SSE2, [ciSSE2]);
-{$IFNDEF TARGET_x64}
- BlendRegistry.Add(FID_MERGEREG, @MergeReg_ASM, []);
- BlendRegistry.Add(FID_COMBINELINE, @CombineLine_SSE2, [ciSSE2]);
- BlendRegistry.Add(FID_BLENDLINEEX, @BlendLineEx_SSE2, [ciSSE2]);
-{$ENDIF}
BlendRegistry.Add(FID_COLORMAX, @ColorMax_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_COLORMIN, @ColorMin_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_COLORADD, @ColorAdd_SSE2, [ciSSE2]);
@@ -3649,9 +3778,11 @@
BlendRegistry.Add(FID_COLORDIFFERENCE, @ColorDifference_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_COLOREXCLUSION, @ColorExclusion_SSE2, [ciSSE2]);
BlendRegistry.Add(FID_COLORSCALE, @ColorScale_SSE2, [ciSSE2]);
- BlendRegistry.Add(FID_LIGHTEN, @LightenReg_ASM, []);
BlendRegistry.Add(FID_LIGHTEN, @LightenReg_SSE2, [ciSSE]);
+{$IFNDEF TARGET_x64}
+ BlendRegistry.Add(FID_MERGEREG, @MergeReg_ASM, []);
{$ENDIF}
+{$ENDIF}
BlendRegistry.RebindAll;
end;
Modified: trunk/Source/GR32_LowLevel.pas
===================================================================
--- trunk/Source/GR32_LowLevel.pas 2012-03-25 10:05:08 UTC (rev 1514)
+++ trunk/Source/GR32_LowLevel.pas 2012-03-25 18:29:35 UTC (rev 1515)
@@ -203,6 +...
[truncated message content] |