Menu

#2098 MinGW startup code pollutes the namespace by calling opendir and readdir

WSL
closed
None
Bug
fixed
IINR_-_Include_In_Next_Release
True
2017-12-18
2013-10-09
No

MinGW runtime 4.x calls the dirent functions, opendir/readdir/closedir, in its startup code. This pollutes the namespace of, and breaks, application programs that have incompatible or entirely unrelated functions by that name, even if the application didn't include the dirent.h header. For example, a program that defines its own readdir will almost surely break the globbing of command-line arguments, and could even crash.

Please make the startup code call __opendir, __readdir, etc. instead, as symbols that begin with two underscores are "reserved", and cannot legitimately appear in an application.

Related

Issues: #2106
Issues: #2111
Issues: #2112
Issues: #2113

Discussion

1 2 > >> (Page 1 of 2)
  • Earnie Boyd

    Earnie Boyd - 2013-10-09
    • status: unread --> assigned
    • assigned_to: Earnie Boyd
     
  • Earnie Boyd

    Earnie Boyd - 2013-10-09

    I think __mingw_opendir, etc would be a better name.

    Care to supply a patch against the 4.1-dev branch of mingw-org-wsl repository? If not this may wait until the 5.0 release.

     
  • Eli Zaretskii

    Eli Zaretskii - 2013-10-09

    Anything that starts with 2 underscores is OK.

    Where can I find instructions for checking out the repository?

     
  • Keith Marshall

    Keith Marshall - 2013-10-10

    So, your own project is fundamentally broken, (because you implement your own replacements for standard POSIX functions, and you neglect to sequester them in your own private (pseudo) namespace). This is your bug, yet you expect us to facilitate a kludge for you, by moving our system core implementation of a POSIX compatibility API, into some other arbitrary namespace? Hardly seems like a solid justification, IMO.

    That said, I guess the fact that this is Windows, which does not support POSIX features as standard, may provide some minimal justification for your request. With that in mind, perhaps it wouldn't be too difficult to adopt something like (untested, patch attached):

    diff --git a/include/dirent.h b/include/dirent.h
    index f965d0d..9aac5b5 100644
    --- a/include/dirent.h
    +++ b/include/dirent.h
    @@ -62,12 +62,29 @@ struct dirent
      */
     typedef union __dirstream_t DIR;
    
    -DIR* __cdecl __MINGW_NOTHROW opendir (const char*);
    -struct dirent* __cdecl __MINGW_NOTHROW readdir (DIR*);
    -int __cdecl __MINGW_NOTHROW closedir (DIR*);
    -void __cdecl __MINGW_NOTHROW rewinddir (DIR*);
    -long __cdecl __MINGW_NOTHROW telldir (DIR*);
    -void __cdecl __MINGW_NOTHROW seekdir (DIR*, long);
    +DIR* __cdecl __MINGW_NOTHROW __mingw_opendir (const char*);
    +_CRTALIAS DIR* __cdecl __MINGW_NOTHROW opendir (const char *__dirname)
    +{ return __mingw_opendir (__dirname); }
    +
    +struct dirent* __cdecl __MINGW_NOTHROW __mingw_readdir (DIR*);
    +_CRTALIAS struct dirent* __cdecl __MINGW_NOTHROW __mingw_readdir (DIR *__dir)
    +{ return __mingw_readdir (__dir); }
    +
    +int __cdecl __MINGW_NOTHROW __mingw_closedir (DIR*);
    +_CRTALIAS int __cdecl __MINGW_NOTHROW closedir (DIR *__dir)
    +{ return __mingw_closedir (__dir); }
    +
    +void __cdecl __MINGW_NOTHROW __mingw_rewinddir (DIR*);
    +_CRTALIAS void __cdecl __MINGW_NOTHROW rewinddir (DIR *__dir)
    +{ return __mingw_rewinddir (__dir); }
    +
    +long __cdecl __MINGW_NOTHROW __mingw_telldir (DIR*);
    +_CRTALIAS long __cdecl __MINGW_NOTHROW telldir (DIR *__dir)
    +{ return __mingw_telldir (__dir); }
    +
    +void __cdecl __MINGW_NOTHROW __mingw_seekdir (DIR*, long);
    +_CRTALIAS void __cdecl __MINGW_NOTHROW seekdir (DIR *__dir, long __loc)
    +{ return __mingw_seekdir (__dir, __loc); }
    
     /* wide char versions */
    @@ -100,13 +117,29 @@ struct _wdirent
      */
     typedef union __wdirstream_t _WDIR;
    
    -_WDIR* __cdecl __MINGW_NOTHROW _wopendir (const wchar_t*);
    -struct _wdirent*  __cdecl __MINGW_NOTHROW _wreaddir (_WDIR*);
    -int __cdecl __MINGW_NOTHROW _wclosedir (_WDIR*);
    -void __cdecl __MINGW_NOTHROW _wrewinddir (_WDIR*);
    -long __cdecl __MINGW_NOTHROW _wtelldir (_WDIR*);
    -void __cdecl __MINGW_NOTHROW _wseekdir (_WDIR*, long);
    +_WDIR* __cdecl __MINGW_NOTHROW __mingw__wopendir (const wchar_t*);
    +_CRTALIAS _WDIR* __cdecl __MINGW_NOTHROW _wopendir (const wchar_t *__dirname)
    +{ return __mingw__wopendir (__dirname); }
    +
    +struct _wdirent*  __cdecl __MINGW_NOTHROW __mingw__wreaddir (_WDIR*);
    +_CRTALIAS struct _wdirent*  __cdecl __MINGW_NOTHROW _wreaddir (_WDIR *__dir)
    +{ return __mingw__wreaddir (__dir); }
    +
    +int __cdecl __MINGW_NOTHROW __mingw__wclosedir (_WDIR*);
    +_CRTALIAS int __cdecl __MINGW_NOTHROW _wclosedir (_WDIR *__dir)
    +{ return __mingw__wclosedir (__dir); }
    +
    +void __cdecl __MINGW_NOTHROW __mingw__wrewinddir (_WDIR*);
    +_CRTALIAS void __cdecl __MINGW_NOTHROW _wrewinddir (_WDIR *__dir)
    +{ return __mingw__wrewinddir (__dirname); }
    +
    +long __cdecl __MINGW_NOTHROW __mingw__wtelldir (_WDIR*);
    +_CRTALIAS long __cdecl __MINGW_NOTHROW _wtelldir (_WDIR *__dir)
    +{ return __mingw__wtelldir (__dirname); }
    
    +void __cdecl __MINGW_NOTHROW __mingw__wseekdir (_WDIR*, long);
    +_CRTALIAS void __cdecl __MINGW_NOTHROW _wseekdir (_WDIR *__dir, long __loc)
    +{ return __mingw__wseekdir (__dir, __loc); }
    
     #ifdef __cplusplus
     }
    diff --git a/src/libcrt/tchar/dirent.c b/src/libcrt/tchar/dirent.c
    index 8d7f162..46c7702 100644
    --- a/src/libcrt/tchar/dirent.c
    +++ b/src/libcrt/tchar/dirent.c
    @@ -31,6 +31,9 @@
     #include <windows.h>
     #include <tchar.h>
    
    +#define __mingw_impl__(__function__)  __mingw_token_prefix__(__function__)
    +#define __mingw_token_prefix__(__suffix__)  __mingw_##__suffix__
    +
     #define DIRENT_RETURN_NOTHING
     #define DIRENT_REJECT( chk, err, rtn ) \
       do { if( chk ){ errno = (err); return rtn; }} while(0)
    @@ -145,7 +148,7 @@ union __wdirstream_t
      *
      */
     _TDIR *
    -_topendir( const _TCHAR *path_name )
    +__mingw_impl__(_topendir)( const _TCHAR *path_name )
     {
       _TDIR *nd;
       _TCHAR abs_path[MAX_PATH];
    @@ -266,7 +269,7 @@ _topendir( const _TCHAR *path_name )
      * on the next available entry, (if any), in the "directory stream".
      */
     struct _tdirent *
    -_treaddir( _TDIR *dirp )
    +__mingw_impl__(_treaddir)( _TDIR *dirp )
     {
       /* Check for a valid DIR stream reference; (we can't really
        * be certain until we try to read from it, except in the case
    @@ -346,7 +349,7 @@ _treaddir( _TDIR *dirp )
      *
      */
     int
    -_tclosedir( _TDIR * dirp )
    +__mingw_impl__(_tclosedir)( _TDIR * dirp )
     {
       /* Attempting to reference a directory stream via a NULL pointer
        * would cause a segmentation fault; evade this.  Since NULL can
    @@ -379,7 +382,7 @@ _tclosedir( _TDIR * dirp )
      *
      */
     void
    -_trewinddir( _TDIR * dirp )
    +__mingw_impl__(_trewinddir)( _TDIR * dirp )
     {
       /* This is an XSI extension to POSIX, which specifies no formal
        * error conditions; we will continue to check for and evade the
    @@ -413,7 +416,7 @@ _trewinddir( _TDIR * dirp )
      *
      */
     long
    -_ttelldir( _TDIR * dirp )
    +__mingw_impl__(_ttelldir)( _TDIR * dirp )
     {
       /* This too is a POSIX-XSI extension, with no mandatory error
        * conditions.  Once again, evade a potential segmentation fault
    @@ -443,7 +446,7 @@ _ttelldir( _TDIR * dirp )
      *
      */
     void
    -_tseekdir( _TDIR * dirp, long loc )
    +__mingw_impl__(_tseekdir)( _TDIR * dirp, long loc )
     {
       /* Another POSIX-XSI extension, with no specified mandatory
        * error conditions; we require a seek location of zero or
    @@ -454,7 +457,7 @@ _tseekdir( _TDIR * dirp, long loc )
       /* Other than this, we simply accept any error condition
        * which arises as we "rewind" the "directory stream"...
        */
    -  _trewinddir( dirp );
    +  __mingw_impl__(_trewinddir)( dirp );
    
       /* ...and, if this is successful...
        */
    

    However, that alone would not suffice; we would still need to add stubs to libmingwex.a, based on something like (again, untested):

    $ cat /media/usb/jmpstub.sx
    #define __entry__(__suffix__)  __label__(_,__suffix__)
    #define __label__(__prefix__,__suffix__)  __prefix__##__suffix__
    #define __mingw__(__suffix__)  __label__(___mingw_,__suffix__)
    
            .text
            .p2align 1
    
    .global __entry__(FUNCTION)
            .def    __entry__(FUNCTION); .scl 2; .type 32;
            .endef
    
    __entry__(FUNCTION):
            jmp     __mingw__(FUNCTION)
    
    $ gcc -c -D FUNCTION=opendir -o opendir.o /media/usb/jmpstub.sx
    
    $ nm opendir.o
    00000000 b .bss
    00000000 d .data
    00000000 t .text
             U ___mingw_opendir
    00000000 T _opendir
    
    $ objdump -D opendir.o
    
    opendir.o:     file format pe-i386
    
    Disassembly of section .text:
    
    00000000 <_opendir>:
       0:   e9 00 00 00 00          jmp    5 <_opendir+0x5>
       5:   90                      nop
       6:   90                      nop
       7:   90                      nop
    

    to ensure that we furnish public symbols, as equivalent entry points, for each of those functions we've now redirected inline.

     

    Last edit: Keith Marshall 2013-10-10
    • Eli Zaretskii

      Eli Zaretskii - 2013-10-10

      Since MinGW is not a Posix environment, applications do not need to stay away of identifiers that are reserved only by Posix.

      Even if MinGW was a Posix environment, it should have supported a compile-time way to turn off Posix, e.g., "gcc -ansi", which defines __STRICT_ANSI__. However, if the startup code unconditionally calls Posix functions, users won't be able to turn that off, there's no fire escape such as not including the header which declares the functions etc.

      So no, I don't agree that this is a bug in my project (which is GNU Emacs, btw). I think that a runtime system should always refrain from pushing non-standard identifiers into an application, except if the programmer explicitly asks for that, e.g., by calling a function or including a header that declares it.

      Thanks for the patch. I take it you no longer need me to propose one (I actually thought about something very similar)?

       
      • Keith Marshall

        Keith Marshall - 2013-10-10

        That's why I agreed that there may be some justification for progressing this.

        There's no point in getting into an argument about it, but IMO it is a bug in GNU Emacs that you did this. IIRC, when I last used AC_REPLACE_FUNCS in one of my own projects, the replacement functions were expected to have a "repl_" prefix, which kind of suggests that GNU convention recommends this. It's been a while though, and I could be mistaken.

        I do take on board, however, your point about the conflict of interest with __STRICT_ANSI__ for example; between us, Earnie and I will progress this, (hopefully in time for 4.1). If you can improve on my proposed patch, please do indicate how; otherwise, we will proceed from this base.

         
        • Eli Zaretskii

          Eli Zaretskii - 2013-10-11

          The patch looks OK to me, thanks. One question: will that _CRTALIAS thing allow to take the address of opendir, readdir, etc.? If not, that could be a problem in some programs. I think we had a thread on the MinGW mailing list about a similar issue (with another function, IIRC).

           
          • Keith Marshall

            Keith Marshall - 2013-10-11

            will that _CRTALIAS thing allow to take the address of opendir, readdir, etc.?

            No, it won't. That's why I noted the additional need to generate the (12 in this case) stubs, based on my jmpstub.sx offering.

            BTW, my patch isn't entirely okay, as it stands; I found three typos, when I applied it in my own sandbox. Earnie, would you like me to progress this, or do you want an updated patch?

             
  • Keith Marshall

    Keith Marshall - 2013-10-10

    BTW Earnie, whatever happened to "_CRTALIAS should become __CRT_ALIAS"?

     
    • Earnie Boyd

      Earnie Boyd - 2013-10-10

      Just haven't had the round tuit yet. I remember everytime I see it in a header, it will probably be near the end of 4.1-dev before I get to it. The same is true for [BEGIN|END]_C_DECLS.

       
    • Eli Zaretskii

      Eli Zaretskii - 2013-10-10

      I'm not Earnie, but... If you mean the double underscore, then that is not required: symbols that begin with an underscore and a capital letter are always reserved for the implementation.

      Apologies if you meant something else.

       
      • Keith Marshall

        Keith Marshall - 2013-10-10

        No problem. I did mean something else: a prior discussion where we had agreed that _CRTALIAS should become __CRT_ALIAS -- Earnie's suggestion BTW -- for stylistic consistency with similar names such as __CRT_INLINE. Of course, changing this one then begs the question: should the likes of _CRTIMP not also be similarly changed? Maybe a lot of effort, for negligible reward?

         

        Last edit: Keith Marshall 2013-10-11
  • Keith Marshall

    Keith Marshall - 2013-10-10
    • Description has changed:

    Diff:

    --- old
    +++ new
    @@ -1,3 +1,3 @@
     MinGW runtime 4.x calls the dirent functions, opendir/readdir/closedir, in its startup code.  This pollutes the namespace of, and breaks, application programs that have incompatible or entirely unrelated functions by that name, even if the application didn't include the dirent.h header.  For example, a program that defines its own readdir will almost surely break the globbing of command-line arguments, and could even crash.
    
    -Please make the startup code call __opendir, __readdir, etc. instead, as symbols that begin with two underscores are "reserved", and cannot legitimately happen in an application.
    +Please make the startup code call `__opendir`, `__readdir`, etc. instead, as symbols that begin with two underscores are "reserved", and cannot legitimately appear in an application.
    
     
  • Keith Marshall

    Keith Marshall - 2013-10-10
    • Patch attached: False --> True
     
  • Earnie Boyd

    Earnie Boyd - 2013-10-11
    • assigned_to: Earnie Boyd --> Keith Marshall
     
  • Earnie Boyd

    Earnie Boyd - 2013-10-11

    Please proceed Keith. 4.1-dev should build as of my last commit and push; I'll try to keep it doing that before pushing again.

     
    • Keith Marshall

      Keith Marshall - 2013-10-11

      It didn't for me, following a pull last night; see [#2100] for explanation.

       

      Related

      Issues: #2100

  • Keith Marshall

    Keith Marshall - 2013-10-14

    [#2106] should also be addressed, in conjunction with this issue.

     

    Related

    Issues: #2106

    • Earnie Boyd

      Earnie Boyd - 2013-10-14

      No, [#2106] will be addressed as a point release of 4.0 while this one will wait for 4.1.

       

      Related

      Issues: #2106

      • Keith Marshall

        Keith Marshall - 2013-10-18

        As you wish. I've already given you a patch for [#2106], and I also have a working implementation for this. Just one issue with it -- where should I put jmpstub.sx? Although I'm currently using it only for this specific case, (which would suggest src/libcrt/tchar), it's actually completely generic. We may well use it to implement physically addressable entry points for any number of other _CRTALIAS (or __CRT_ALIAS) functions, in which case a more general location would be better; src, misc/src, src/libcrt, (although it's potentially more general than exclusive to the scope of libcrt); somewhere else? Where would you prefer?

         

        Related

        Issues: #2106

        • Earnie Boyd

          Earnie Boyd - 2013-10-18

          I think src/libcrt/misc is a good fit for that.

           
          • Keith Marshall

            Keith Marshall - 2013-10-18

            I can certainly put it there. Its scope could be broader than just libcrt; do you consider it unlikely that we might use in a broader context?

             
            • Earnie Boyd

              Earnie Boyd - 2013-10-18

              Let me soak on it a bit. I'll get back to you within the next 12 hours. Maybe creating src/misc is a better fit. The misc/src directory contains code that we're not able to claim as our own.

               
              • Earnie Boyd

                Earnie Boyd - 2013-10-18

                I'm liking src/misc/ for this code. We can then use it wherever we like.

                 
1 2 > >> (Page 1 of 2)