The compiler assumes that global variables cannot be modified by functions, thus "optimise" the code resolving tests and expressions on the basis of the value assigned before calling the function.
In this code, n0 is global and can be modified by AddSat1()
{
n1=0;
MyObj = &MyObject[0];
for (ii=0;ii<MaxNumObj;ii++)
{
if (MyObj->Status != 0)
{
for (jj=0;jj<MyObj->NumSprt;jj++)
{
y = MyObj->y + MyObj->dy[jj];
x = MyObj->x + MyObj->dx[jj];
p = MyObj->pat[jj];
c = MyObj->col[jj];
AddSat1(); // SAT1
}
}
MyObj++;
}
if (n1<32) {
VDP_Poke_16K(216, SatAddr1+4*n1);
}
}
The compiler assume that n1 stays 0 when evaluating the last IF (n1<32) and generates this ASM
_SatUpdate::
push ix
ld ix,#0
add ix,sp
dec sp
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:130: if (Sprt_flip == 1)
ld a, (_Sprt_flip+0)
dec a
jp NZ,00114$
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:132: n1=0;
ld hl, #_n1
ld (hl), #0x00
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:134: MyObj = &MyObject[0];
ld iy, #_MyObj
ld 0 (iy), #<(_MyObject)
ld 1 (iy), #>(_MyObject)
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:135: for (ii=0;ii<MaxNumObj;ii++)
ld hl, #_ii
ld (hl), #0x00
00119$:
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:137: if (MyObj->Status != 0)
ld hl, (_MyObj)
ld a, (hl)
or a, a
jr Z, 00103$
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:139: for (jj=0;jj<MyObj->NumSprt;jj++)
ld hl, #_jj
ld (hl), #0x00
00117$:
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:137: if (MyObj->Status != 0)
ld bc, (_MyObj)
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:139: for (jj=0;jj<MyObj->NumSprt;jj++)
ld e, c
ld d, b
ld hl, #7
add hl, de
ld e, (hl)
ld a, (_jj+0)
sub a, e
jr NC, 00103$
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:141: y = MyObj->y + MyObj->dy[jj];
ld l, c
; spillPairReg hl
; spillPairReg hl
ld h, b
; spillPairReg hl
; spillPairReg hl
inc hl
inc hl
inc hl
ld e, (hl)
inc hl
ld d, (hl)
ld hl, #0x0014
add hl, bc
ld a, (_jj+0)
ld -1 (ix), a
ld a, l
add a, -1 (ix)
ld l, a
; spillPairReg hl
; spillPairReg hl
jr NC, 00209$
inc h
00209$:
ld l, (hl)
; spillPairReg hl
ld h, #0x00
; spillPairReg hl
; spillPairReg hl
add hl, de
ld (_y), hl
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:142: x = MyObj->x + MyObj->dx[jj];
ld l, c
; spillPairReg hl
; spillPairReg hl
ld h, b
; spillPairReg hl
; spillPairReg hl
inc hl
ld e, (hl)
inc hl
ld d, (hl)
ld hl, #0x0008
add hl, bc
ld a, l
add a, -1 (ix)
ld l, a
; spillPairReg hl
; spillPairReg hl
jr NC, 00210$
inc h
00210$:
ld l, (hl)
; spillPairReg hl
ld h, #0x00
; spillPairReg hl
; spillPairReg hl
add hl, de
ld (_x), hl
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:143: p = MyObj->pat[jj];
ld hl, #0x0020
add hl, bc
ld e, -1 (ix)
ld d, #0x00
add hl, de
ld a, (hl)
ld (_p+0), a
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:144: c = MyObj->col[jj];
ld hl, #0x002c
add hl, bc
ld e, -1 (ix)
ld d, #0x00
add hl, de
ld a, (hl)
ld (_c+0), a
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:146: AddSat1(); // SAT1
call _AddSat1
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:139: for (jj=0;jj<MyObj->NumSprt;jj++)
ld hl, #_jj
inc (hl)
jp 00117$
00103$:
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:149: MyObj++;
ld hl, #_MyObj
ld a, (hl)
add a, #0x38
ld (hl), a
jr NC, 00211$
inc hl
inc (hl)
00211$:
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:135: for (ii=0;ii<MaxNumObj;ii++)
ld hl, #_ii
inc (hl)
ld a, (_ii+0)
sub a, #0x10
jp C, 00119$
;./after_burner\AFTER_BURNER_SPRITE_SPLIT.C:152: VDP_Poke_16K(216, SatAddr1+4*n1);
ld de, #0x1e00
ld a, #0xd8
call _VDP_Poke_16K
jp 00126$
00114$:
As you can see n1 has been evaluated a 0 both in the comparison and in the expression 4*n1
On which version of SDCC did you see the bug (sdcc --version)? On which host OS? Which command line options were used to compile?
Can you provide a compileable code sample to reproduce the issue?
Even if not, maybe there are still a few things to try (also helpful if you can provide code, but with code I could try them myself):
The workaround is to remove the function and inject its code directly, in order to make SDCC see that it is modifying n1
sdcc --version
SDCC : mcs51/z80/z180/r2k/r2ka/r3ka/sm83/tlcs90/ez80_z80/z80n/r800/ds390/pic16/pic14/TININative/ds400/hc08/s08/stm8/pdk13/pdk14/pdk15/mos6502/mos65c02 TD- 4.4.0 #14620 (MINGW64)
published under GNU General Public License (GPL)
I'm using Windows 11
This should be the command line (I'm using a script from msxgl)
Compiling ./template.c using SDCC C compiler...
Execute: C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\tools/sdcc/bin/sdcc -c -mz80 -DTARGET=TARGET_ROM_ASCII16_4096K -DMSX_VERSION=MSX_TR -IC:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/ -IC:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\engine/src -IC:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\engine/content -IC:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\tools/ --opt-code-speed -DAPPSIGN ./template.c -o C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/out/
Exit code: 0
This is the C file that includes the file where the bug happens
Last edit: Ragozini Arturo 2024-04-12
Look at the void SatUpdate(void) in this file
The "broken" n1 is just after the call the the function FAKE()
Diff:
I was able to reproduce the issue using SDCC 4.4.1 #14789 on Debian GNU/Linux on amd64.
From your code, I created the attached regression test. It looks like all SDCC ports are affected (I tried z80, stm8, mcs51).
--nogcseis a workaround.P.S.: An improved version of the attached test is in [r14790] (disabled until someone fixes the bug).
Related
Commit: [r14790]
Last edit: Philipp Klaus Krause 2024-04-12
Diff:
Playing with that, I get the minimal code (using just u8) which still passes 0 to the minimal VDP_Poke_16K this:
Clearly passes 0 to _VDP_Poke_16K:
The call to fake() has to be in a block inside of a block, and one of the these blocks has to be a
forloop: I can't getifinside of anifto generate a problem, and removing theif ( bad )line while keeping theforloop would also produce the correct code.Thanks. That simplification is now in [r14795].
Related
Commit: [r14795]
Welcome, I'm glad when it helps.
And, unsurprisingly, what I immediately see as the additional improvement point in that generated code: IY is used there "for no reason" IMO, even if these IX and IY registers should always be the last to be used as they produce both bigger and slower code (worse in both dimensions, so clear-cut).