The compiler generates redundant code neglecting some 16 bit instructions
I have this struct:
typedef struct {
u8 Status;
i16 x,y;
i8 vx,vy;
u8 NumSprt;
u8 dx[MaxNumSprtPerObj],dy[MaxNumSprtPerObj];
u8 pat[MaxNumSprtPerObj],col[MaxNumSprtPerObj];
} Object;
extern Object MyObject[];
and this segment of code, where MyObj is a pointer to the above struct,and x,y,p and c are global char
MyObj = &MyObject[0];
for (ii=0;ii<MaxNumObj;ii++)
{
if (MyObj->Status != 0)
{
for (jj=0;jj<MyObj->NumSprt;jj++)
{
y = MyObj->y + MyObj->dy[jj];
x = MyObj->x + MyObj->dx[jj];
p = MyObj->pat[jj];
c = MyObj->col[jj];
The compiled code is this:
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:138: MyObj = &MyObject[0];
ld iy, #_MyObj
ld 0 (iy), #<(_MyObject)
ld 1 (iy), #>(_MyObject)
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:139: for (ii=0;ii<MaxNumObj;ii++)
ld hl, #_ii
ld (hl), #0x00
00143$:
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:141: if (MyObj->Status != 0)
ld hl, (_MyObj)
ld a, (hl)
or a, a
jp Z, 00115$
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:143: for (jj=0;jj<MyObj->NumSprt;jj++)
ld hl, #_jj
ld (hl), #0x00
00141$:
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:141: if (MyObj->Status != 0)
ld bc, (_MyObj)
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:143: for (jj=0;jj<MyObj->NumSprt;jj++)
ld e, c
ld d, b
ld hl, #7
add hl, de
ld e, (hl)
ld a, (_jj+0)
sub a, e
jp NC, 00115$
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:145: y = MyObj->y + MyObj->dy[jj];
ld l, c
; spillPairReg hl
; spillPairReg hl
ld h, b
; spillPairReg hl
; spillPairReg hl
inc hl
inc hl
inc hl
ld e, (hl)
inc hl
ld d, (hl)
ld hl, #0x0014
add hl, bc
ld a, (_jj+0)
ld -1 (ix), a
ld a, l
add a, -1 (ix)
ld l, a
; spillPairReg hl
; spillPairReg hl
jr NC, 00341$
inc h
00341$:
ld l, (hl)
; spillPairReg hl
ld h, #0x00
; spillPairReg hl
; spillPairReg hl
add hl, de
ld (_y), hl
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:146: x = MyObj->x + MyObj->dx[jj];
ld l, c
; spillPairReg hl
; spillPairReg hl
ld h, b
; spillPairReg hl
; spillPairReg hl
inc hl
ld e, (hl)
inc hl
ld d, (hl)
ld hl, #0x0008
add hl, bc
ld a, l
add a, -1 (ix)
ld l, a
; spillPairReg hl
; spillPairReg hl
jr NC, 00342$
inc h
00342$:
ld l, (hl)
; spillPairReg hl
ld h, #0x00
; spillPairReg hl
; spillPairReg hl
add hl, de
ld (_x), hl
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:147: p = MyObj->pat[jj];
ld hl, #0x0020
add hl, bc
ld e, -1 (ix)
ld d, #0x00
add hl, de
ld a, (hl)
ld (_p+0), a
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:148: c = MyObj->col[jj];
ld hl, #0x002c
add hl, bc
ld e, -1 (ix)
ld d, #0x00
add hl, de
ld a, (hl)
ld (_c+0), a
The problems, mainly in the loop, are many. This section
ld bc, (_MyObj)
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:143: for (jj=0;jj<MyObj->NumSprt;jj++)
ld e, c
ld d, b
ld hl, #7
add hl, de
ld e, (hl)
ld a, (_jj+0)
sub a, e
jp NC, 00115$
can be replaced by
ld hl, (_MyObj)
ld de,7
add hl,de
ld a, (_jj+0)
cp (hl)
jp NC, 00115$
where this section:
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:145: y = MyObj->y + MyObj->dy[jj];
ld l, c
ld h, b
inc hl
inc hl
inc hl
ld e, (hl)
inc hl
ld d, (hl)
ld hl, #0x0014
add hl, bc
ld a, (_jj+0)
ld -1 (ix), a
ld a, l
add a, -1 (ix)
ld l, a
jr NC, 00341$
inc h
00341$:
ld l, (hl)
ld h, #0x00
add hl, de
ld (_y), hl
can be replaced by:
ld de,3-7 // use that HL is _MyObj+7 and that we need _MyObj+3
add hl,de
ld e, (hl) // read Y in de
inc hl
ld d, (hl) // hl = _MyObj+4
ld bc,0x14-4 // use that HL is _MyObj+4 and we need _MyObj+0x14
add hl,bc
ld a, (_jj+0)
ld c,a
ld b,0
add hl,bc // hl = &MyObj->dy[jj]
ld l,(hl)
ld h,0 // hl = MyObj->dy[i]
add hl,de
ld (_y), hl
What I see is that the compiler does not use ld r,(hl) , ld (hl),r, cp (hl). It doesn't use 16 add/sub but it prefers to perform 8 bit add/sum even if this implies to resort to stack space by using (ix). It omits to reuse the previous value of pointers to fields of structures by adding the offset between fields...
Similar problems when adding dx[i]
My C source
Its corresponding ASM
Note that with bc and de, this code
is slightly faster than
While with hl this is alway faster than using A
In the last 15 years for the z80 port in sdcc, I think substantially more effort has gone into improving the handling of local variables than into improving the handling of global ones. So it is no surprise to me that someone who uses global variables a lot runs into quite a few potential improvements.