Small Device C Compiler (SDCC) / Discussion / Help: #pragma callee

I have a few questions about callee_saves for the experts. I've been searching all over for this information but my Google-fu is weak. Most search results talk about callee saves concept in general or just links to various versions of the sdcc manual, which doesn't seem to explain the following points.

I understand that the called function must save/restore the registers, but how is this actually done in practice? I eventually settled on inline assembly. There's no way in C to manipulate registers, as far as I know. Maybe this is blindingly obvious to some, but maybe a hint for the clueless in the manual. (e.g. "... additional code in inline assembly need to be manually inserted...") would be helpful. The manual contains other nuggets of wisdom, so I don't think this would be out of line. Maybe an example of it being used would be good.
How does one determine a priori which registers need to be saved? I ended up looking at the .asm/.lst of the calling function after the fact to determine which registers are being saved and applied that to the called function when switching on callee-saves. Can I rely on the same registers being used by the called function across compilations/versions of the compiler? (maybe something like "Look in the compiled assembly to determine the registers used by the called function." in the manual would help.)
For the life of me, I could not get the #pragma callee_saves to work. I have tried 3.6.0 and 3.8.0 on Mac OS X and 3.6.0 on Windows. (the command line --callee-saves seems to work.)

My functions are:

//base SPI routines.

#include<mcs51/8051.h>

#ifndef SPI_H
#define SPI_H

#define MOSI P1_5
#define MISO P1_6
#define SCK  P1_7
#define CS   P1_4


#pragma callee_saves SPI_select, SPI_deselect, SPI_outb, SPI_inb
void SPI_select() {
    SCK=0;
    CS=0;
    return;
} 

void SPI_deselect() {
    CS=1;
    return;
}

void SPI_outb(unsigned char data) {
    unsigned char bitpos;
__asm__ (" push ar7\n push ar6\n ");
    for (bitpos=0;bitpos<8;bitpos++) {
        SCK=0;
        if(data & 0x80) {
            MOSI=1;
        } else {
            MOSI=0;
        }
        SCK=1;
        data = data<<1;
    }
__asm__ (" pop ar6\n pop ar7\n ");
return;
}

unsigned char SPI_inb() {
unsigned char bitpos;
unsigned char data;
unsigned char pinState;
__asm__ (" push ar7\n push ar6\n push ar5\n ");
data = 0;
    for (bitpos=0;bitpos<8;bitpos++) {
        data = data<<1;
        SCK=0;
       // Get state of MISO configured as INPUT:
        pinState = MISO ? 1 : 0;
        //delay
        pinState = MISO ? 1 : 0;
        data |= pinState;
        SCK=1;
    }
    //this will generate a warning.  I have to do my own return value to dpl 
    //so that I can pop ar7.
    __asm__ (" pop ar5\n pop ar6\n mov dpl,r7\n pop ar7\n");    
//return data;
}
#endif

It's used like this:

#include "spi.h"
 <snip>
///////////////////// S25_SectorErase  /////////////////////////
int S25_SectorErase(unsigned int sector) {
   SPI_select();
   SPI_outb(ins_WREN);
   SPI_deselect();
   SPI_select();
   SPI_outb(ins_4SE); 
   SPI_outb((sector>>8) & 0xff);
   SPI_outb(sector & 0xff);
   SPI_outb(0);
   SPI_outb(0);
   SPI_deselect();
   return S25_WaitWIP();
}

Looking at the compiled assembly, I still see some push/pop around the above functions:

;------------------------------------------------------------
;Allocation info for local variables in function 'S25_SectorErase'
;------------------------------------------------------------
;sector                    Allocated to registers r6 r7 
;------------------------------------------------------------
;   s25flash.h:296: int S25_SectorErase(unsigned int sector) {
;   -----------------------------------------
;    function S25_SectorErase
;   -----------------------------------------
_S25_SectorErase:
    mov r6,dpl
    mov r7,dph
;   s25flash.h:297: SPI_select();
    lcall   _SPI_select
;   s25flash.h:298: SPI_outb(ins_WREN);
    mov dpl,#0x06
    push    ar7
    push    ar6
    lcall   _SPI_outb
;   s25flash.h:299: SPI_deselect();
    lcall   _SPI_deselect
;   s25flash.h:300: SPI_select();
    lcall   _SPI_select
;   s25flash.h:301: SPI_outb(ins_4SE); 
    mov dpl,#0xdc
    lcall   _SPI_outb
    pop ar6
    pop ar7
;   s25flash.h:302: SPI_outb((sector>>8) & 0xff);
    mov ar5,r7
    mov dpl,r5
    push    ar7
    push    ar6
    lcall   _SPI_outb
    pop ar6
    pop ar7
;   s25flash.h:303: SPI_outb(sector & 0xff);
    mov dpl,r6
    lcall   _SPI_outb
;   s25flash.h:304: SPI_outb(0);
    mov dpl,#0x00
    lcall   _SPI_outb
;   s25flash.h:305: SPI_outb(0);
    mov dpl,#0x00
    lcall   _SPI_outb
;   s25flash.h:306: SPI_deselect();
    lcall   _SPI_deselect
;   s25flash.h:307: return S25_WaitWIP();
    ljmp    _S25_WaitWIP

However, if I include --callee-saves SPI_inb,SPI_outb,SPI_select,SPI_deselect in the command line, I get the expected output

------------------------------------------------------------
;sector                    Allocated to registers r6 r7 
;------------------------------------------------------------
;   s25flash.h:296: int S25_SectorErase(unsigned int sector) {
;   -----------------------------------------
;    function S25_SectorErase
;   -----------------------------------------
_S25_SectorErase:
    mov r6,dpl
    mov r7,dph
;   s25flash.h:297: SPI_select();
    lcall   _SPI_select
;   s25flash.h:298: SPI_outb(ins_WREN);
    mov dpl,#0x06
    lcall   _SPI_outb
;   s25flash.h:299: SPI_deselect();
    lcall   _SPI_deselect
;   s25flash.h:300: SPI_select();
    lcall   _SPI_select
;   s25flash.h:301: SPI_outb(ins_4SE); 
    mov dpl,#0xdc
    lcall   _SPI_outb
;   s25flash.h:302: SPI_outb((sector>>8) & 0xff);
    mov ar5,r7
    mov dpl,r5
    lcall   _SPI_outb
;   s25flash.h:303: SPI_outb(sector & 0xff);
    mov dpl,r6
    lcall   _SPI_outb
;   s25flash.h:304: SPI_outb(0);
    mov dpl,#0x00
    lcall   _SPI_outb
;   s25flash.h:305: SPI_outb(0);
    mov dpl,#0x00
    lcall   _SPI_outb
;   s25flash.h:306: SPI_deselect();
    lcall   _SPI_deselect
;   s25flash.h:307: return S25_WaitWIP();
    ljmp    _S25_WaitWIP

I want the pragma to work so that the callee_saves stays with the functions and I don't have to think about it when I use the code in other projects. I've tryed callee_saves, callee-saves, with and without spaces between the function names. Any clues as to what I'm doing wrong?

This is more a comment than a question. When trying to restore a register that is being used to return a value to the caller, I have to intentionally leave off the return value in the return statement and include the mov to DPL in the inline assembly. This generates a "must return value" warning. Is there a more correct way to do it? If this is the correct way, maybe another hint in the manual would be helpful.

Unrelated commentary: Going from 3.6.0 to 3.8.0 seems to have reduced the code size ~100 bytes. (cool!) Working implementation of calle-saves cut another 200. (double cool.) I'm still just under 10KB. I want to fit in an 8K EEPROM but it's looking grim. (uncool) (I was just under 7KB before I had to turn on --stack-auto due to running out of DSEG, after which it immediately blew up to ~10K). Maybe I like the 8051 just because it's such a challenge. :)

Thank you in advance.

Maarten Brock - 2019-02-17

You should not have to save/restore the registers yourself unless you use inline assembly, mark the function naked or write the whole function in assembly. The compiler should not save/restore registers around a call to a callee-saves function. And it should save/restore registers inside a callee-saves function that the function uses. In short you should not need to use inline assembly for it.

Of course you have to tell the compiler not only to implement a function as callee-saves but also tell all callers through a prototype (in spi.h) that it is callee-saves.

Having said all that, callee-saves is something that is not regression tested AFAIK and is even likely to disappear in the future.

When you run out of DSEG you can also place a few variables in idata. Due to lack of indexing instructions in idata accessing data on the stack is cumbersome for an 8051, hence the increase in code size. But accessing fixed addresses in idata is easier.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Chase Tsang - 2019-02-18

Thank you for the explanation. I was lead to believe I needed to add the saving/restoring code in the "callee-saves" function by the text in the manual which read:

The compiler will not save registers when calling these functions, extra code need to be manually inserted at the entry and exit for these functions to save and restore the regis- ters used by these functions

The generated assembly seems to also be what I want.

However, upon re-reading your explanation, I think I see now that the callee-saves should be placed on the big function that calls the small function, not only on the small functions that is called by the big functions. (The wording of the manual makes it sound like it is only on the callee) I thought I actually tried that once but still wasn't getting the results I wanted. On the other hand, if it might disappear in the future, I think I'll just give up on trying to use it.

Thank you also for the tip about idata. I'll see what I can squeeze out of it.

After I switched to --stack-auto, the code compiles, but got wierd runtime behavior. I guessed I was running out of stack altogether. I probably added too many things at once and now I have to probably backtrack and figure out what's pushing it over the edge.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Maarten Brock - 2019-02-18

For starters, the pragma is weird. Normally we use a special keyword to change the calling convention of specific functions (e.g. __reentrant).

You should not mark the caller as callee-saves, only the callee.

I don't think I have ever tested the pragma. I may have tested the --callee-saves once or twice a long time ago, but I don't remember if it was without issues. And as said, it is not regression tested. There already are too many permutations to cover. So, callee-saves might be full of bugs. If you want you can open a bug report, but I doubt it will get much attention in the near future.

The part in the manual may require some updating.

Apart from placing stuff in idata you can also try pdata. Like idata pdata can be accessed by two registers (r0, r1) instead of just the one dptr for xdata. Pdata is the default memory for the medium memory model.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

Chase Tsang - 2019-02-18

Oh, ok. Perhaps then, the pragma is simply broken, since I get what appears to be the intended behavior (per the generated assembly) if I use the --callee-saves command line and manually insert the save/restore code in the callee as directed in the manual.

It is a useful feature, as it saved a non-trivial number of bytes in the resulting code (though admittedly, I may get more benefit from actually optimizing other parts of the code.) However, I understand it is not a priority.

I hadn't fully considered using pdata as I already have some portions of the code using xdata and I hadn't quite wrapped my head around how they would interact.

Thank you again for your help. I think I understand the situation now.

If you would like to refer to this comment somewhere else in this project, copy and paste the following link:

#pragma callee_saves usage

The Small Device C Compiler (SDCC), targeting 8-bit architectures

Forums

Help

#pragma callee_saves usage

#pragma callee_saves usage

The Small Device C Compiler (SDCC), targeting 8-bit architectures

Forums

Help

#pragma callee_saves usage document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

#pragma callee_saves usage