Menu

#906 Inefficient z80 code

None
open
nobody
None
5
2024-04-23
2024-04-13
No

The compiler generates redundant code neglecting some 16 bit instructions
I have this struct:

typedef struct {
    u8 Status;
    i16 x,y;
    i8 vx,vy;
    u8 NumSprt;
    u8 dx[MaxNumSprtPerObj],dy[MaxNumSprtPerObj];
    u8 pat[MaxNumSprtPerObj],col[MaxNumSprtPerObj];
} Object;


extern Object MyObject[];

and this segment of code, where MyObj is a pointer to the above struct,and x,y,p and c are global char

        MyObj = &MyObject[0];
        for (ii=0;ii<MaxNumObj;ii++)
        {
            if (MyObj->Status != 0) 
            {
                for (jj=0;jj<MyObj->NumSprt;jj++) 
                {
                    y = MyObj->y + MyObj->dy[jj];
                    x = MyObj->x + MyObj->dx[jj];
                    p = MyObj->pat[jj];
                    c = MyObj->col[jj];

The compiled code is this:

;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:138: MyObj = &MyObject[0];
    ld  iy, #_MyObj
    ld  0 (iy), #<(_MyObject)
    ld  1 (iy), #>(_MyObject)
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:139: for (ii=0;ii<MaxNumObj;ii++)
    ld  hl, #_ii
    ld  (hl), #0x00
00143$:
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:141: if (MyObj->Status != 0) 
    ld  hl, (_MyObj)
    ld  a, (hl)
    or  a, a
    jp  Z, 00115$
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:143: for (jj=0;jj<MyObj->NumSprt;jj++) 
    ld  hl, #_jj
    ld  (hl), #0x00
00141$:
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:141: if (MyObj->Status != 0) 
    ld  bc, (_MyObj)
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:143: for (jj=0;jj<MyObj->NumSprt;jj++) 
    ld  e, c
    ld  d, b
    ld  hl, #7
    add hl, de
    ld  e, (hl)
    ld  a, (_jj+0)
    sub a, e
    jp  NC, 00115$
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:145: y = MyObj->y + MyObj->dy[jj];
    ld  l, c
;   spillPairReg hl
;   spillPairReg hl
    ld  h, b
;   spillPairReg hl
;   spillPairReg hl
    inc hl
    inc hl
    inc hl
    ld  e, (hl)
    inc hl
    ld  d, (hl)
    ld  hl, #0x0014
    add hl, bc
    ld  a, (_jj+0)
    ld  -1 (ix), a
    ld  a, l
    add a, -1 (ix)
    ld  l, a
;   spillPairReg hl
;   spillPairReg hl
    jr  NC, 00341$
    inc h
00341$:
    ld  l, (hl)
;   spillPairReg hl
    ld  h, #0x00
;   spillPairReg hl
;   spillPairReg hl
    add hl, de
    ld  (_y), hl
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:146: x = MyObj->x + MyObj->dx[jj];
    ld  l, c
;   spillPairReg hl
;   spillPairReg hl
    ld  h, b
;   spillPairReg hl
;   spillPairReg hl
    inc hl
    ld  e, (hl)
    inc hl
    ld  d, (hl)
    ld  hl, #0x0008
    add hl, bc
    ld  a, l
    add a, -1 (ix)
    ld  l, a
;   spillPairReg hl
;   spillPairReg hl
    jr  NC, 00342$
    inc h
00342$:
    ld  l, (hl)
;   spillPairReg hl
    ld  h, #0x00
;   spillPairReg hl
;   spillPairReg hl
    add hl, de
    ld  (_x), hl
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:147: p = MyObj->pat[jj];
    ld  hl, #0x0020
    add hl, bc
    ld  e, -1 (ix)
    ld  d, #0x00
    add hl, de
    ld  a, (hl)
    ld  (_p+0), a
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:148: c = MyObj->col[jj];
    ld  hl, #0x002c
    add hl, bc
    ld  e, -1 (ix)
    ld  d, #0x00
    add hl, de
    ld  a, (hl)
    ld  (_c+0), a

The problems, mainly in the loop, are many. This section

    ld  bc, (_MyObj)
;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:143: for (jj=0;jj<MyObj->NumSprt;jj++) 
    ld  e, c
    ld  d, b
    ld  hl, #7
    add hl, de
    ld  e, (hl)
    ld  a, (_jj+0)
    sub a, e
    jp  NC, 00115$

can be replaced by

        ld  hl, (_MyObj)
        ld  de,7
        add hl,de
        ld  a, (_jj+0)
        cp (hl)
        jp  NC, 00115$

where this section:

;C:\Users\PC\OneDrive\Documenti\MSXgl-1.0.0\projects\template_msx2/template_s255_b1.c:145: y = MyObj->y + MyObj->dy[jj];
    ld  l, c
    ld  h, b
    inc hl
    inc hl
    inc hl
    ld  e, (hl)
    inc hl
    ld  d, (hl)
    ld  hl, #0x0014
    add hl, bc
    ld  a, (_jj+0)
    ld  -1 (ix), a
    ld  a, l
    add a, -1 (ix)
    ld  l, a
    jr  NC, 00341$
    inc h
00341$:
    ld  l, (hl)
    ld  h, #0x00
    add hl, de
    ld  (_y), hl

can be replaced by:

        ld de,3-7   // use that HL is _MyObj+7 and that we need _MyObj+3
        add hl,de   
        ld  e, (hl) // read Y in de
        inc hl
        ld  d, (hl) // hl = _MyObj+4
        ld bc,0x14-4    // use that HL is  _MyObj+4 and we need  _MyObj+0x14
        add hl,bc   
        ld  a, (_jj+0)
        ld  c,a
        ld  b,0
        add hl,bc       // hl = &MyObj->dy[jj]
        ld  l,(hl)
        ld  h,0         // hl = MyObj->dy[i]
        add hl,de
        ld  (_y), hl

What I see is that the compiler does not use ld r,(hl) , ld (hl),r, cp (hl). It doesn't use 16 add/sub but it prefers to perform 8 bit add/sum even if this implies to resort to stack space by using (ix). It omits to reuse the previous value of pointers to fields of structures by adding the offset between fields...

Similar problems when adding dx[i]

Discussion

  • Ragozini Arturo

    Ragozini Arturo - 2024-04-13

    My C source

     
  • Ragozini Arturo

    Ragozini Arturo - 2024-04-13

    Its corresponding ASM

     
  • Ragozini Arturo

    Ragozini Arturo - 2024-04-13

    Note that with bc and de, this code

            ld  a, (_jj+0)
            ld  c,a
            ld  b,0
    

    is slightly faster than

            ld  bc, (_jj+0)
            ld  b,0
    

    While with hl this is alway faster than using A

            ld  hl, (_jj+0)
            ld  h,0
    
     
  • Philipp Klaus Krause

    In the last 15 years for the z80 port in sdcc, I think substantially more effort has gone into improving the handling of local variables than into improving the handling of global ones. So it is no surprise to me that someone who uses global variables a lot runs into quite a few potential improvements.

     
    👍
    2
  • Philipp Klaus Krause

    • summary: Bad z80 code --> Inefficient z80 code
    • Group: -->
     

Log in to post a comment.

MongoDB Logo MongoDB