Menu

#665 Build fails with LTO

v1.0 (example)
open
nobody
None
5
2024-04-01
2024-03-25
No

I tried to build with the following *FLAGS to optimize the build: -flto=4 -Werror=odr -Werror=lto-type-mismatch -Werror=strict-aliasing

Note the -Werror=* flags are used to help detect cases where the compiler tries to optimize by assuming UB cannot exist in the source code -- if it does exist, ordinarily the code would be miscompiled, and this says to make the miscompilation a fatal error.

I got this error:

make[3]: Entering directory '/var/tmp/portage/sci-electronics/ngspice-42/work/ngspice-42-binaries/src/xspice/icm'
for cm in spice2poly digital analog xtradev xtraevt table ; do \
    make cm=$cm $cm/$cm.cm \
    || exit 1; \
done
make[4]: Entering directory '/var/tmp/portage/sci-electronics/ngspice-42/work/ngspice-42-binaries/src/xspice/icm'
x86_64-pc-linux-gnu-gcc -march=native -fstack-protector-all -O2 -pipe -fdiagnostics-color=always -frecord-gcc-switches -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -fstack-clash-protection -flto=4 -Werror=odr -Werror=lto-type-mismatch -Werror=strict-aliasing  -Wformat -Werror=format-security -Werror=implicit-function-declaration -Werror=implicit-int -Werror=int-conversion -Werror=incompatible-pointer-types -fopenmp -fPIC -fvisibility=hidden -shared spice2poly/dlmain.o dstring.o spice2poly/icm_spice2poly/cfunc.o spice2poly/icm_spice2poly/ifspec.o -lm -o spice2poly/spice2poly.cm
spice2poly/cmextrn.h:1:19: error: type of spice2poly_info does not match original declaration [-Werror=lto-type-mismatch]
    1 | extern  SPICEdev  spice2poly_info;
      |                   ^
spice2poly/icm_spice2poly/ifspec.c:135:10: note: spice2poly_info was previously declared here
  135 | SPICEdev spice2poly_info = {
      |          ^
spice2poly/icm_spice2poly/ifspec.c:135:10: note: code may be misoptimized unless -fno-strict-aliasing is used
lto1: some warnings being treated as errors
lto-wrapper: fatal error: x86_64-pc-linux-gnu-gcc returned 1 exit status
compilation terminated.
/usr/lib/gcc/x86_64-pc-linux-gnu/13/../../../../x86_64-pc-linux-gnu/bin/ld: error: lto-wrapper failed
collect2: error: ld returned 1 exit status
make[4]: *** [GNUmakefile:114: spice2poly/spice2poly.cm] Error 1

Previously reported downstream: https://bugs.gentoo.org/862513
Attached is a full build log.

1 Attachments

Discussion

  • Dietmar Warning

    Dietmar Warning - 2024-03-25

    Thanks for the report.
    Do you have a recommendation how to solve the potential problem in the code?

     
  • Giles Atkinson

    Giles Atkinson - 2024-03-25

    If the function name is changed in ifspec.ifs, even by one character, the compilation succeeds. But then the problem occurs with several of the digital code models, There should be a pattern to the behaviour, but I have not found it.

     

    Last edit: Giles Atkinson 2024-03-25
    • marcel hendrix

      marcel hendrix - 2024-03-25

      Isn't the compiler complaining about the missing 'extern'? I also notice an extra space between '..dev' and 'spice2..', had a strange problem with that myself a few days ago.

       
  • Holger Vogt

    Holger Vogt - 2024-03-25

    When I look at \src\include\ngspice\devdefs.h, I find this:

    typedef struct SPICEdev {
        IFdevice DEVpublic;
    
        int (*DEVparam)(int,IFvalue*,GENinstance*,IFvalue *);  
            /* routine to input a parameter to a device instance */
    
    ...
    
        int (*DEVnoise)(int, int, GENmodel*,CKTcircuit*, Ndata *, double *);
        /* noise routine */
        int (*DEVsoaCheck)(CKTcircuit*,GENmodel*);
        /* subroutine to call on soa check */
    #ifdef CIDER    
        void (*DEVdump)(GENmodel *, CKTcircuit *);
        void (*DEVacct)(GENmodel *, CKTcircuit *, FILE *);
            /* routines to report device internals
             * now used only by cider numerical devices
             */
    #endif
        int *DEVinstSize;    /* size of an instance */
        int *DEVmodSize;     /* size of a model */
    
    #ifdef KLU
        int (*DEVbindCSC)(GENmodel *, CKTcircuit *);
            /* routine to convert Sparse linked list to Real CSC array */
        int (*DEVbindCSCComplex)(GENmodel *, CKTcircuit *);
            /* routine to convert Real CSC array to Complex CSC array */
        int (*DEVbindCSCComplexToReal)(GENmodel *, CKTcircuit *);
            /* routine to convert Complex CSC array to Real CSC array */
    #endif
    
    } SPICEdev;  /* instance of structure for each possible type of device */
    

    When I then look at \visualc\xspice\icm\spice2poly\icm_spice2poly\icm_spice2poly-ifspec.c,
    there is

    SPICEdev spice2poly_info = {
        .DEVpublic = {
    
    ...
    
        .DEVnoise = NULL,
        .DEVsoaCheck = NULL,
        .DEVinstSize = &val_sizeofMIFinstance,
        .DEVmodSize = &val_sizeofMIFmodel,
    
    #ifdef CIDER
        .DEVdump = NULL,
        .DEVacct = NULL,
    #endif
    
    #ifdef KLU
        .DEVbindCSC = MIFbindCSC,
        .DEVbindCSCComplex = MIFbindCSCComplex,
        .DEVbindCSCComplexToReal = MIFbindCSCComplexToReal,
    #endif
    };
    

    The position of the CIDER thing is different. Can this be the root cause?

     
  • Giles Atkinson

    Giles Atkinson - 2024-03-25

    I have a fix, but I do not think it should be adopted:

    diff --git a/src/xspice/icm/dlmain.c b/src/xspice/icm/dlmain.c
    index bde19fcd2..7403875e8 100644
    --- a/src/xspice/icm/dlmain.c
    +++ b/src/xspice/icm/dlmain.c
    @@ -14,15 +14,15 @@
     #include  <string.h>
    
     #include "ngspice/cpextern.h"
    -#include "ngspice/devdefs.h"
     #include "ngspice/dstring.h"
     #include "ngspice/dllitf.h"
     #include "ngspice/evtudn.h"
     #include "ngspice/inpdefs.h"
     #include "ngspice/inertial.h"
    -#include "cmextrn.h"
     #include "udnextrn.h"
    
    +typedef struct SPICEdev SPICEdev;
    +#include "cmextrn.h"
    

    An obvious question: how does that work? I found it after I decided that the warning message (treated as error) is probably the result of a compiler bug. What does the warning say? It lacks any detail, but seems to mean that the typedef SPICEdev has a different meaning in dlmain.c and the ifspec.c file output from cmpp. That is ridiculous: SPICEdev is defined once in devdefs.h, there are no conditionals in the definition and the header is included into both C files.

    If there is a gcc bug, how might it work? Perhaps the compiler "thinks" there are two different devdefs.h header files - it is confused by compiling source from different directory levels. It turns out that dlmain.c makes no use of the SPICEdev variables: they are placed in a global array to be found by dlsym() after the code-model library has been dynamically loaded. There are no other references,. So devdef.h can be removed from dlmain.c with a little ugly fix, and then the compiler can not be confused. It works!

    Some supporting evidence that this is a gcc bug: according to the gcc manual, -flto is not some tweak to code generation like most -fxxxx options. It goes deep, so deep that the format of the generated .o files is very different when it is used. That is easily confirmed with readelf. Also the option seems much less mature than gcc's traditional functions. The only other time I saw it used is in compiling for microcontrollers, where optimising for size is important. So it seems the -flto option is not much used with software for bigger computers, and likely not mature. A web search for the error message in this case turns up only variations of this bug report for other software. All come from Gentoo.

    Finally, the error report is sensitive to the function name in ifspec.ifs. That smells bad, and the ngspice part seems so simple that the bad smell must be coming from the compiler.

     
  • Arsen Arsenović

    An obvious question: how does that work? I found it after I decided that the warning message (treated as error) is probably the result of a compiler bug. What does the warning say? It lacks any detail, but seems to mean that the typedef SPICEdev has a different meaning in dlmain.c and the ifspec.c file output from cmpp. That is ridiculous: SPICEdev is defined once in devdefs.h, there are no conditionals in the definition and the header is included into both C files.

    Possibly one of the types in the struct that changes. A composite type depends on all the types in the composite.

    Indeed, that appears to be the case (thanks, abidiff!):

      [C] 'SPICEdev ucm_d_to_real_info' was changed at ngss2.c:19042:1:
        size of symbol changed from 336 to 392
        type of variable changed:
          underlying type 'struct SPICEdev' at ngss1.c:6892:1 changed:
            type size changed from 2688 to 3136 (in bits)
            33 data member changes:
              type of 'IFdevice DEVpublic' changed:
                underlying type 'struct IFdevice' at ngss1.c:3858:1 changed:
                  type size changed from 640 to 1088 (in bits)
                  7 data member insertions:
                    'void (Mif_Private_t*)* cm_func', at offset 576 (in bits) at ngss2.c:16254:1
                    'int num_conn', at offset 640 (in bits) at ngss2.c:16256:1
                    'Mif_Conn_Info_t* conn', at offset 704 (in bits) at ngss2.c:16257:1
                    'int num_param', at offset 768 (in bits) at ngss2.c:16259:1
                    'Mif_Param_Info_t* param', at offset 832 (in bits) at ngss2.c:16260:1
                    'int num_inst_var', at offset 896 (in bits) at ngss2.c:16262:1
                    'Mif_Inst_Var_Info_t* inst_var', at offset 960 (in bits) at ngss2.c:16263:1
                  1 data member change:
                    'int flags' offset changed from 576 to 1024 (in bits) (by +448 bits)
    

    IFdevices are wildly different between the src/xspice/icm/xtraevt/dlmain.i dlmain TU and src/xspice/icm/xtraevt/d_to_real/ifspec.c TU. Preprocessing both reveals the difference.

    The reason the hack works is because the differing type never gets declared, the fwdecl simply gets merged with the correct type.

    If there is a gcc bug, how might it work? Perhaps the compiler "thinks" there are two different devdefs.h header files - it is confused by compiling source from different directory levels. It turns out that dlmain.c makes no use of the SPICEdev variables: they are placed in a global array to be found by dlsym() after the code-model library has been dynamically loaded.

    File names are not relevant here. GCC just compares the types. They differ. Redeclaring the same file twice (i.e. using header files) does not cause this error.

    Some supporting evidence that this is a gcc bug: according to the gcc manual, -flto is not some tweak to code generation like most -fxxxx options. It goes deep, so deep that the format of the generated .o files is very different when it is used. That is easily confirmed with readelf.

    This is indeed the case. Despite that, LTO is perfectly (assuming no bugs) standards compliant. It happens to often detect ODR-violations and similar issues (like the type mismatches above), simply due to being a massively global analysis pass.

    Also the option seems much less mature than gcc's traditional functions. The only other time I saw it used is in compiling for microcontrollers, where optimising for size is important.

    Not at all, it is widely deployed (many distros, not just Gentoo) build with it. Some examples:

    https://fedoraproject.org/wiki/LTOByDefault
    https://wiki.ubuntu.com/ToolChain/LTO

    A web search for the error message in this case turns up only variations of this bug report for other software. All come from Gentoo.

    This is because we test quite publicly.

     

    Last edit: Arsen Arsenović 2024-03-25
  • Holger Vogt

    Holger Vogt - 2024-03-25

    In \src\include\ngspice\ifsim.h, in the definition of struct IFdevice, there is an additional

    #ifdef OSDI
        const void *registry_entry;
    #endif
    

    after
    int flags;
    which is not there in

    SPICEdev spice2poly_info = {
        .DEVpublic = {
        ...
    

    and again also the sequence differs (position of int flags; before or after the XSPICE entries (does that matter?).

    A test might be to compile without --enable-osdi.

     

    Last edit: Holger Vogt 2024-03-25
  • Giles Atkinson

    Giles Atkinson - 2024-03-26

    Thank you for the corrections Arsen Arsenović. As usually happens, the hypothesis that the compiler is at fault is quite wrong, but this time has led to correct fix:

    diff --git a/src/xspice/icm/dlmain.c b/src/xspice/icm/dlmain.c
    index bde19fcd2..401800192 100644
    --- a/src/xspice/icm/dlmain.c
    +++ b/src/xspice/icm/dlmain.c
    @@ -13,6 +13,7 @@
     #include  <stdlib.h>
     #include  <string.h>
    
    +#include "ngspice/config.h"
     #include "ngspice/cpextern.h"
     #include "ngspice/devdefs.h"
    

    When dlmain.c can see the configured options the layouts are the same.

     
    🎉
    1
  • Giles Atkinson

    Giles Atkinson - 2024-04-01

    But now:

    Author: Holger Vogt <holger.vogt@uni-due.de>
    Date:   Fri Mar 29 17:27:43 2024 +0100
    
        Bug 665: enable compiling the code models with link time optimization.
        Fix provided by Giles Atkinson.
        Still the build of ngspice fails (tested with Cygwin) at the linking stage.
    

    In earlier testing, my builds were successful with this change. But now, on Linux, I see what I hope are the same errors:

    make[2]: Entering directory '/home/ga/ngspice/pre-master/release/src'
      CCLD     ngspice
    frontend/../../../src/include/ngspice/cpextern.h:123:13: error: type of 'out_is\
    atty' does not match original declaration [-Werror=lto-type-mismatch]
      123 | extern bool out_isatty;
          |             ^
    frontend/../../../src/frontend/terminal.c:44:6: note: type '_Bool' should match\
     type 'bool'
       44 | bool out_isatty = TRUE;
          |      ^
    frontend/../../../src/frontend/terminal.c:44:6: note: 'out_isatty' was previous\
    ly declared here
    frontend/../../../src/frontend/terminal.c:44:6: note: code may be misoptimized \
    unless '-fno-strict-aliasing' is used
    
            ...
    

    I do not know what has changed and unfortunately did not record the build options that hid these errors. This needs more testing, but appears to be a fix:

    diff --git a/src/include/ngspice/bool.h b/src/include/ngspice/bool.h
    index c5f70e331..202d3b602 100644
    --- a/src/include/ngspice/bool.h
    +++ b/src/include/ngspice/bool.h
    @@ -1,9 +1,8 @@
     #ifndef ngspice_BOOL_H
     #define ngspice_BOOL_H
    
    -//typedef unsigned char bool;
     #ifndef __cplusplus
    -typedef int bool;
    +#include <stdbool.h>
     #endif
    
     typedef int BOOL;
    

    The LTO option is also reporting a lot of "...may be used uninitialized" warnings that may be worth looking at.

     
  • Holger Vogt

    Holger Vogt - 2024-04-01

    Your solution will not compile with MS Windows. I do have

    diff --git a/src/include/ngspice/bool.h b/src/include/ngspice/bool.h
    index c5f70e331..7f398d105 100644
    --- a/src/include/ngspice/bool.h
    +++ b/src/include/ngspice/bool.h
    @@ -1,20 +1,20 @@
     #ifndef ngspice_BOOL_H
     #define ngspice_BOOL_H
    
    -//typedef unsigned char bool;
    +#if defined (__MINGW32__) || defined (_MSC_VER)
     #ifndef __cplusplus
     typedef int bool;
     #endif
    +#else
    +#include <stdbool.h>
    +#endif
    
     typedef int BOOL;
    
    -
     #define BOOLEAN int
     #define TRUE  1
     #define FALSE 0
     #define NO    0
     #define YES   1
    
    -
    -
     #endif
    
     
  • Giles Atkinson

    Giles Atkinson - 2024-04-01

    That also works on my machine (Debian Linux). G.

     

Log in to post a comment.