Diff of /mcsource.c [e8a4ca] .. [3f615e]  Maximize  Restore

Switch to side-by-side view

--- a/mcsource.c
+++ b/mcsource.c
@@ -3,13 +3,13 @@
  *
  * $Id$
  *
- * Copyright (C) 2006, 2007, 2008, Keith Marshall
+ * Copyright (C) 2006, 2007, Keith Marshall
  *
  * This file implements the message catalogue source code parser, which is
  * used internally by `gencat', to compile message dictionaries.
  *
  * Written by Keith Marshall  <keithmarshall@users.sourceforge.net>
- * Last modification: 08-Jan-2008
+ * Last modification: 27-Mar-2007
  *
  *
  * This is free software.  It is provided AS IS, in the hope that it may
@@ -57,7 +57,6 @@
 #include <debug.h>
 
 #include <platform.h>
-#include <mcutfsig.h>
 
 #ifdef DEBUG_BUFSIZ
 # undef  BUFSIZ
@@ -96,6 +95,7 @@
   /* Identify a GENCAT directive, based on a specified keyword,
    * and activate the appropriate parser attribute bits to process it.
    */
+
   static struct directives
   {
     /* Defines the dictionary of known directives,
@@ -130,30 +130,8 @@
 }
 
 static
-char *mc_default_codeset( void )
+int errout( const char *src, long linenum, const char *fmt, ... )
 {
-  /* Helper function, called when the message definition file for a
-   * catalogue doesn't explicitly specify a codeset for the messages;
-   * establish the default codeset for the message catalogue, using
-   * the codeset of the LC_MESSAGES category in the present locale.
-   */
-  char *codeset;
-
-  if( (codeset = setlocale( LC_MESSAGES, "" )) == NULL )
-    codeset = setlocale( LC_MESSAGES, NULL );
-  setlocale( LC_CTYPE, codeset );
-  codeset = strdup( nl_langinfo( CODESET ));
-  setlocale( LC_CTYPE, "C" );
-
-  return codeset;
-}
-
-static
-int mc_errout( const char *src, long linenum, const char *fmt, ... )
-{
-  /* Message dispatcher for error messages,
-   * used when `gencat_errno' is to be set to indicate `EXIT_FAILURE'.
-   */
   va_list args;
   va_start( args, fmt );
   fprintf( stderr, "%s:%ld:", src, linenum );
@@ -162,8 +140,8 @@
   return EXIT_FAILURE;
 }
 
-static inline
-off_t mc_workspace_wanted( int fd )
+static
+off_t wanted( int fd )
 {
   struct stat info;
 # ifndef DEBUG
@@ -177,22 +155,19 @@
   return (off_t)(BUFSIZ);
 }
 
-static inline
-size_t mc_add_escape( iconv_t *iconv_map, char *msgbuf, wchar_t code )
+static
+size_t add_escape( iconv_t *iconv_map, char *msgbuf, wchar_t code )
 {
 /* A trivial helper function, for encoding an escape sequence into the
  * compiled message stream.
  */
-  dfprintf(( stderr, "add escape code: %0#4.4x", code ));
+  dfprintf(( stderr, DCODEFMT, code ));
   return iconv_wctomb( msgbuf, code );
 }
 
-static inline
-char *mc_update_workspace( char *buf, char *cache, unsigned int count )
+static
+char *update_workspace( char *buf, char *cache, unsigned int count )
 {
-  /* A helper function, to transfer encoded text from the input buffer
-   * to the workspace in which compiled messages are being collected.
-   */
 # ifdef DEBUG
   unsigned int xcount = count;
   char *start = buf;
@@ -206,40 +181,12 @@
   return buf;
 }
 
-static inline
-struct msgdict *mc_discard( struct msgdict *index, char *messages )
-{
-  /* A helper function, to reclaim all memory allocated to a local
-   * message dictionary, prior to aborting compilation of the current
-   * message catalogue source file.
-   */
-  while( index )
-  {
-    /* Walk the linked list of dictionary index entries, (if any),
-     * releasing the memory block alloted to each individually.
-     */
-    struct msgdict *next = index->link;
-    free( index );
-    index = next;
-  }
-  if( messages )
-    /*
-     * All of the indexed messages are collected into a single block,
-     * which is allocated, and so must be released, separately.
-     */
-    free( messages );
-  return index;
-}
-
 struct msgdict *mc_source( const char *input )
 {
-# define CODESET_DECLARED  codeset_decl_src, codeset_decl_lineno
-# define UTF_TYPE(ORDER)   8 * input_code_size, (ORDER)
-
-  dinvoke( int dtrace = 0; )
-
+# define CODESET_DECLARED       codeset_decl_src, codeset_decl_lineno
+
+  int fd, count;
   long accumulator;
-  int fd, input_fd, count;
   char buf[BUFSIZ], keyword[64];
   char *id;
 
@@ -258,175 +205,31 @@
   static char *codeset = NULL;
   static const char *codeset_decl_src = NULL;
   static unsigned int codeset_decl_lineno = 0;
-
-  unsigned short input_encoding = 0, input_code_size;
-
   static iconv_t iconv_map[2] = {(iconv_t)(-1), (iconv_t)(-1)};
   char *messages; off_t msgloc, headroom;
-  /*
-   * This `shift' state index is used to control interpretation
-   * of octal escape sequences in message text; for normal text
-   * processing, it should be set to zero.
-   */
-  unsigned shift = 0;
-  /*
-   * Other shift states supported, (they define the number of bits
-   * by which the accumulator must be shifted to the left, in order
-   * to multiply it by the associated number base), are:--
-   */
-#  define OCTAL_SEQUENCE_DECODE        3
-#  define HEXADECIMAL_SEQUENCE_DECODE  4
-
-  /* We use `last_char' to keep track track the character parsed
-   * in the most * recently preceding cycle.  (This is required so
-   * that we may explicitly handle CRLF line terminations, which are
-   * to be considered as a single character code; Microsoft's `O_TEXT'
-   * kludge cannot be used, because we may be running `gencat' as a
-   * cross hosted tool, on a platform which doesn't support this).
-   */
-  wchar_t last_char = L'\0';
-
-  /* Get a file descriptor for the input stream ...
-   */
+
   const char *dev_stdin = "/dev/stdin";
-  if( (strcmp( input, "-") == 0) || (strcasecmp( input, dev_stdin ) == 0) )
+  if( (strcmp( input, "-") == 0) || (strcmp( input, dev_stdin ) == 0) )
   {
-    /* ... reading from standard input ...
-     */
-    input_fd = fd = STDIN_FILENO;
+    fd = STDIN_FILENO;
     input = dev_stdin;
   }
-  /* ... or otherwise, from a named file ...
-   */
-  else if( (input_fd = fd = open( input, O_RDONLY | O_BINARY )) < 0 )
-    /*
-     * ... which we must be able to open, else we bail out.
-     */
+
+  else if( (fd = open( input, O_RDONLY | O_BINARY )) < 0 )
     return NULL;
+
   dfprintf(( stderr, "\n%s:new source file\n%s:", input, input ));
-
-  /* Allocate the workspace, in which we will collect the text of the
-   * messages to be compiled into the catalogue ...
-   */
-  if( (messages = mc_malloc( headroom = mc_workspace_wanted( fd ))) == NULL )
+  if( (messages = mc_malloc( headroom = wanted( fd ))) == NULL )
+    return NULL;
+
+  msgloc = (off_t)(0);
+  while( (count = read( fd, buf, sizeof( buf ) )) > 0 )
   {
-    /* ... but release our input file descriptor, and bail out,
-     * when we can't get sufficient memory.
-     */
-    close( input_fd );
-    return NULL;
-  }
-
-  /* Parse the input stream ...
-   */
-  msgloc = (off_t)(0);
-  while( (fd >= 0) && ((count = read( fd, buf, sizeof( buf ) )) > 0) )
-  {
-    /* ... for as long as there is text to be read ...
-     */
     char *p = buf;
     int high_water_mark = count - ( count >> 2 );
     dfprintf(( stderr, "\n%s:%u:read %u byte%s", input, linenum, count, count == 1 ? "" : "s" ));
-
-    if( input_encoding == 0 )
-    {
-      input_encoding = mc_utf_signature( buf );
-      switch( input_code_size = input_encoding & UTF_CODE_UNIT_SIZE_MASK )
-      {
-	case 1:
-	  if( (input_encoding & UTF_WITH_BYTE_ORDER_MARK) != 0 )
-	  {
-	    /*
-	     * This is UTF-8 input encoding, affirmed by the presence of
-	     * the byte order mark, (three bytes), which we must skip.
-	     */
-	    p += 3;
-	    count -= 3;
-	  }
-	  break;
-
-	case 2:
-	case 4:
-	  if( (input_encoding & UTF_WITH_BYTE_ORDER_MARK) != 0 )
-	  {
-	    /* This is either UTF-16, or UTF-32, also affirmed by the BOM,
-	     * which occupies the first code unit, so skip it.
-	     */
-	    p += input_code_size;
-	    count -= input_code_size;
-	  }
-	  break;
-
-	default:
-	  /*
-	   * This isn't valid, for any recognisable codeset in the required
-	   * POSIX Portable Character Set input context; diagnose, clean up,
-	   * and bail out.
-	   */
-	  dfputc(( '\n', stderr ));
-	  fprintf( errmsg( MSG_UTF_UNKNOWN ), input );
-	  fprintf( errmsg( MSG_UTF_SIZE_ERROR ), input, input_code_size );
-	  free( messages );
-	  close( input_fd );
-	  return NULL;
-      }
-
-      if( input_encoding > 1 )
-      {
-	/* We've detected a UTF input encoding, which implicitly specifies
-	 * the codeset of the messages defined within this source file.
-	 */
-	char utf_byte_order = UTF_BYTE_ORDER( input_encoding );
-	sprintf( keyword, "UTF-%d%cE", 8 * input_code_size, utf_byte_order );
-
-	dfprintf(( stderr, "\n%s:", input ));
-	dinvoke( if( (input_encoding & UTF_WITH_BYTE_ORDER_MARK) != 0 ) )
-	  dfprintf(( stderr, "unicode byte order mark detected; " ));
-	dfprintf(( stderr, "encoding identified as %s", keyword ));
-
-	if( codeset != NULL ) 
-	{
-	  /* We could coalesce these two conditions into a single test,
-	   * but we choose to nest them thus, to facilitate a possible
-	   * future change, to support codeset alternation.
-	   */
-	  if( strcmp( keyword, codeset ) != 0 )
-	  {
-	    /* The detected UTF input encoding is not compatible with the
-	     * previously declared codeset of the messages in the catalogue;
-	     * diagnose, and skip this source file.
-	     */
-	    dfputc(( '\n', stderr ));
-	    fprintf( errmsg( MSG_UTF_CODESET ), input, keyword );
-	    fprintf( errmsg( MSG_HAD_CODESET ), CODESET_DECLARED, codeset );
-	    free( messages );
-	    close( input_fd );
-	    return NULL;
-	  }
-	}
-
-	else
-	{
-	  /* We don't yet have a codeset declaration; establish one implicitly,
-	   * based on the identified input encoding.
-	   */
-	  id = strdup( keyword );
-	  if( (codeset = map_codeset( iconv_map, id, "wchar_t" )) == NULL )
-	  {
-	    free( id );
-	  }
-
-	  else
-	    codeset_decl_src = input;
-	}
-      }	
-    }
-
     while( count > 0 )
     {
-      /* ... scanning character by character,
-       * through the entire content of the input buffer.
-       */
       wchar_t c;
       int skip = 1;
       if( status & ENCODED )
@@ -435,12 +238,19 @@
          * of the current message catalogue locale, so ensure that
          * we have established an appropriate codeset mapping.
          */
+
         if( codeset == NULL )
         {
 	  /* No codeset mapping is yet in place,
 	   * so default to the codeset of the system locale.
 	   */
-          codeset = map_codeset( iconv_map, mc_default_codeset(), "wchar_t" );
+
+          if( (codeset = setlocale( LC_MESSAGES, "" )) == NULL )
+            codeset = setlocale( LC_MESSAGES, NULL );
+          setlocale( LC_CTYPE, codeset );
+          codeset = strdup( nl_langinfo( CODESET ));
+          setlocale( LC_CTYPE, "C" );
+          codeset = map_codeset( iconv_map, codeset, "wchar_t" );
 	  codeset_decl_lineno = linenum;
 	  codeset_decl_src = input;
         }
@@ -449,88 +259,29 @@
          * multibyte character codeset specified for the message catalogue,
          * transforming to the wide character domain, for local processing.
          */
+
         p += ((skip = iconv_mbtowc( &c, p, count )) > 0) ? skip : 0;
-
-	/* For UTF-16 or UTF-32 input encodings, the `skip' count *must*
-	 * match the codeset size, ...
-	 */
-	if( (input_code_size > 1) && (skip != input_code_size) )
-	{
-	  /* ... or we have a framing error; diagnose,
-	   * and discard this input stream.
-	   */
-	  dfputc(( '\n', stderr ));
-	  fprintf( errmsg( MSG_UTF_FRAME_ERROR ), input, linenum, codeset );
-	  return mc_discard( head, messages );
-	}
       }
 
       else
       {
         /* We are parsing context which is defined in the POSIX,
-         * or "C" locale, so read single byte character sequences,
-	 * but stripping out any padding NULs required to fill the
-	 * input stream to a UTF-16 or UTF-32 framing boundary.
+         * or "C" locale, so read single byte character sequences.
          */
-	int utf_skip = input_code_size - 1;
-	if( (utf_skip > 0) && ((input_encoding & UTF_BIG_ENDIAN) != 0) )
-	{
-	  /* Big-Endian Unicode should have padding NULs before the
-	   * POSIX `C' locale byte required.
-	   */
-	  while( (*p == '\0') && utf_skip-- && count-- )
-	    ++p;
-	  if( (utf_skip > 0) || (*p == '\0') )
-	  {
-	    /* Diagnose and bail out, if the number of padding NULs
-	     * wasn't what we expected.
-	     */
-	    dfputc(( '\n', stderr ));
-	    fprintf( errmsg( MSG_UTF_FRAME_ERROR ), input, linenum, UTF_TYPE( 'B' ));
-	    return mc_discard( head, messages );
-	  }
-	}
+
         c = (wchar_t)(*p++);
-	if( (utf_skip > 0) && ((input_encoding & UTF_LITTLE_ENDIAN) != 0) )
-	{
-	  /* Little-Endian Unicode should have the padding NULs after
-	   * this significant byte.
-	   */
-	  while( (*p == '\0') && utf_skip-- && count-- )
-	    ++p;
-	  if( (utf_skip > 0) || (*p == '\0') )
-	  {
-	    /* Diagnose and bail out, if the number of padding NULs
-	     * wasn't what we expected.
-	     */
-	    dfputc(( '\n', stderr ));
-	    fprintf( errmsg( MSG_UTF_FRAME_ERROR ), input, linenum, UTF_TYPE( 'L' ));
-	    return mc_discard( head, messages );
-	  }
-	}
       }
 
       if( skip > 0 )
       {
         count -= skip;
-        if( c == '\r' )
-	  /*
-	   * The current input character is a carriage return.
-	   * This may simply be the lead byte of a CRLF line terminator
-	   * in a CRLF format input file, but we will not know this until
-	   * we examine the following input character; request a FLUSH,
-	   * so we keep the workspace consistent, and defer processing
-	   * this CR until the next cycle, (by which time, it will
-	   * have been moved into `last_char').
-	   */
-	  status |= FLUSH;
-	
-	else if( status & NEWLINE )
+        if( status & NEWLINE )
         {
           /* We just started parsing a new input line ...
            * Increment the line number, reset the parser context,
            * and clear the set/message number accumulator.
            */
+
           ++linenum;
           status &= ~( DIRECTIVE | NUMERIC | CATEGORY );
           accumulator = 0;
@@ -538,8 +289,10 @@
           if( (status & (NEWLINE | CONTINUED)) == NEWLINE )
           {
             /* When this new line is NOT simply a logical continuation
-             * of the previous line ...
+             * of the previous line...
              */
+
+            status &= ~MSGTEXT;
             dfprintf(( stderr, "\n\n%s:%d:new input record", input, linenum ));
             if( c == '$' )
             {
@@ -547,6 +300,7 @@
                * means that this line is either a `gencat' directive,
                * or it's a comment.
                */
+
               status |= DIRECTIVE;
               id = keyword;
             }
@@ -556,6 +310,7 @@
               /* This is a message definition line,
                * with a the message identified by an explicit numeric key.
                */
+
               status |= NUMERIC;
               accumulator = c - L'0';
             }
@@ -568,6 +323,7 @@
              * then we need to include the current input character
              * as part of the message definition.
              */
+
             if( c == quote )
             {
               dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
@@ -577,7 +333,6 @@
             else
             {
               xcount += skip;
-	      dinvoke(( dtrace = dtrace ? dtrace : fprintf( stderr, "\n%s:%u:scan input: ", input, linenum ) ));
               dfputc(( c, stderr ));
             }
           }
@@ -585,6 +340,7 @@
           /* Now, we dealt with the new line conditions,
            * so clear the related NEWLINE and CONTINUATION flags.
            */
+
           status &= ~( NEWLINE | CONTINUED );
         }
 
@@ -594,18 +350,21 @@
            * which persists until a space character marks the end of the
            * directive identifying keyword.
            */
+
           if( isspace( c ) )
           {
             /* We found the keyword delimiting space ...
             */
+
             if( id == keyword )
             {
-              /* But, we didn't find any keyword ...
+              /* But, we didn't find any keyword...
                *
                * This is a comment line, but it may be the special case of
 	       * a codeset declaration comment, so we can't simply ignore it;
 	       * set the comment state, to parse any codeset assignment.
                */
+
 	      status = (status & ~CATEGORY) | DEFCONV;
               dfprintf(( stderr, "\n%s:%u:record type: comment", input, linenum ));
             }
@@ -635,13 +394,12 @@
 		{
 		  if( strcmp( codeset, id ) != 0 )
 		  {
-		    dfputc(( '\n', stderr ));
 		    fprintf( errmsg( MSG_CODESET_CLASH ), input, linenum, id );
 		    fprintf( errmsg( MSG_HAD_CODESET ), CODESET_DECLARED, codeset );
 		  }
 		  free( id );
 		}
-		dfprintf(( stderr, "\n%s:%u:declare %s", input, linenum, keyword ));
+		dfprintf(( stderr, "; declare %s", keyword ));
 	      }
 	    }
 
@@ -651,6 +409,7 @@
 	       * We have identified a possible match for a directive keyword;
                * identify it, and establish its associated parser state.
                */
+
               *id = '\0';
               status = mc_directive( status, keyword );
               dfprintf(( stderr, "\n%s:%u:record type: directive: %s", input, linenum, keyword ));
@@ -663,6 +422,7 @@
             /* We are still parsing a potential directive keyword;
              * add the current character to the keyword parse buffer.
              */
+
             if( (id - keyword) < (sizeof( keyword ) - 1) )
               *id++ = c;
           }
@@ -670,14 +430,16 @@
 
         else if( status & NUMERIC )
         {
-          /* We are parsing a numeric value ...
-           */
+          /* We are parsing a numeric value...
+          */
+
           if( isdigit( c ) )
           {
-            /* ... and the current character is part of the number,
+            /* ...and the current character is part of the number,
              * so add it into the accumulator.
              */
-	    accumulator = accumulator * 10 + c - L'0';
+
+            accumulator = accumulator * 10 + c - L'0';
           }
 
           else if( isspace( c ) )
@@ -686,13 +448,16 @@
              * so hand it off as a set number, or a message number,
              * and process as appropriate.
              */
+
             switch( status & CATEGORY )
             {
               case ADDSET:
                 /*
                  * Invoked by a "set" directive,
                  * open a new numbered message set within the catalogue ...
+                 *
                  */
+
                 dfprintf(( stderr, ": add set with id = %ld", accumulator ));
                 if( accumulator > setnum )
                 {
@@ -704,6 +469,7 @@
                    * so we can simply create a new message set with this "setnum",
                    * and reset the "msgnum", for the start of a new set.
                    */
+
                   setnum = accumulator;
                   msgnum = 0;
                 }
@@ -713,9 +479,9 @@
                   /* This "setnum" entry DOESN'T satisfy the ascending order rule,
                    * so complain, and bail out.
                    */
+
                   dfputc(( '\n', stderr ));
-                  gencat_errno = mc_errout( FATAL( MSG_SETNUM_NOT_INCR ), setnum, accumulator );
-		  close( input_fd );
+                  gencat_errno = errout( FATAL( MSG_SETNUM_NOT_INCR ), setnum, accumulator );
 		  return NULL;
                 }
                 break;
@@ -724,6 +490,7 @@
                 /*
                  * Invoked by a "delset" directive,
                  * mark a numbered message set for deletion from the catalogue.
+		 *
                  */
                 dfprintf(( stderr, ": delete set with id = %ld", accumulator ));
 		if( (accumulator > 0) && (accumulator <= NL_SETMAX) )
@@ -734,6 +501,7 @@
                     /* We successfully created an empty dictionary slot,
 		     * so fill it in as a `delset' request entry.
 		     */
+
 		    this->src = input;
 		    this->lineno = linenum;
 		    this->base = NULL;
@@ -744,6 +512,7 @@
                       /* The catalogue currently contains no records,
                        * so simply insert this as the first one.
                        */
+
                       head = tail = this;
                       this->link = NULL;
                     }
@@ -753,6 +522,7 @@
                       /* We've already added some message records,
                        * so the new one must be added at the end.
                        */
+
                       this->link = tail->link;
                       tail->link = this;
                       tail = this;
@@ -769,6 +539,7 @@
                    * message defined in the current set; this declaration satisfies
                    * this requirement, so add a new message to the catalogue.
                    */
+
                   struct msgdict *this;
                   if( (this = mc_malloc( sizeof( struct msgdict ))) != NULL )
                   {
@@ -778,21 +549,16 @@
                      * first check that one has been opened; if not, we
                      * simply open the default set.
                      */
+
                     if( setnum == 0 )
                       setnum = NL_SETD;
 
                     /* We may now complete the message details in the new
                      * dictionary slot, and commit the record to the catalogue.
-		     * Note that, if the message number tag in the source file
-		     * is on an otherwise empty line, and is *immediately*
-		     * followed by a newline, with no intervening space,
-		     * then this message should be deleted; we flag this
-		     * by setting `this->base = NULL'.  In all other cases,
-		     * the message is to be placed into the catalogue, so
-		     * we set 'this->base = messages'.
                      */
+
 		    this->src = input;
-		    this->base = (c == L'\n') ? NULL : messages;
+		    this->base = messages;
 		    this->lineno = linenum;
                     this->set = setnum;
                     this->msg = msgnum = accumulator;
@@ -802,6 +568,7 @@
                       /* The catalogue currently contains no records,
                        * so simply insert this as the first one.
                        */
+
                       head = tail = this;
                       this->link = NULL;
                     }
@@ -811,6 +578,7 @@
                       /* We've already added some message records,
                        * so the new one must be added at the end.
                        */
+
                       this->link = tail->link;
                       tail->link = this;
                       tail = this;
@@ -820,14 +588,12 @@
 
                 else
                 {
-                  /* This doesn't satisfy the POSIX requirement that,
-                   * within each set, messages must appear in strictly
-                   * incrementing "msgnum" order, so complain, and
-                   * bail out.
+                  /* This doesn't satisfy the requirement for incrementing "msgnum",
+                   * so complain, and bail out.
                    */
+
                   dfputc(( '\n', stderr ));
-                  gencat_errno = mc_errout( FATAL( MSG_MSGNUM_NOT_INCR ), msgnum, accumulator );
-		  close( input_fd );
+                  gencat_errno = errout( FATAL( MSG_MSGNUM_NOT_INCR ), msgnum, accumulator );
 		  return NULL;
                 }
                 status |= ( MSGTEXT | ENCODED );
@@ -849,6 +615,7 @@
            * defining the "quote" character to be used, or "none" if no other
            * character appears before end of line.
            */
+
           quote = (c == L'\n') ? L'\0' : c;
           dfprintf(( stderr, quote ? ": assigned as %#4.4x" : ": none assigned", quote ));
           status &= ~( CATEGORY | ENCODED );
@@ -857,248 +624,112 @@
         else if( status & MSGTEXT )
         {
           /* We are compiling a message ...
-	   */
-	  if( shift )
+           * Continue scanning the current input line,
+           * until we find the end-of-line marker.
+           */
+
+          if( c != L'\n' )
+          {
+            /* We haven't reached end-of-line yet...
+             * Check for other characters with special significance.
+             */
+
+            if( status & ESCAPE )
+            {
+              /* The current input character was escaped...
+               * Clear the ESCAPE flag, and interpret this case.
+               */
+
+	      size_t len = 0;
+              status &= ~ESCAPE;
+              switch ( c )
+              {
+                case L'r':      /* embed a carriage return */
+		  len = add_escape( iconv_map, messages + msgloc, L'\r' );
+                  break;
+
+                case L'n':      /* embed a newline */
+		  len = add_escape( iconv_map, messages + msgloc, L'\n' );
+                  break;
+
+                default:        /* not a special case; just pass it through */
+                  xcount += skip;
+                  dfputc(( c, stderr ));
+              }
+	      if( len > (size_t)(0) )
+	      {
+		headroom -= len;
+		msgloc += len;
+	      }
+            }
+
+            else if( c == L'\\' )
+            {
+              /* This is the escape character...
+               * Set the parser flags, so that any cached message data is flushed,
+               * and switch to ESCAPE mode, to interpret the next character.
+               */
+
+              status |= FLUSH | ESCAPE;
+            }
+
+            else if( c == quote )
+            {
+              dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
+              status = (status ^ QUOTED) | FLUSH;
+            }
+
+            else
+            {
+              xcount += skip;
+              dfputc(( c, stderr ));
+            }
+          }
+	  if( count < ICONV_MB_LEN_MAX )
 	  {
-	    /* The current input character is either part of an
-	     * escaped octal digit sequence, or it terminates one.
-	     */
-	    switch( c )
-	    {
-	      case L'0' ... L'7':
-		/*
-		 * This is a continuation of the sequence ...
-		 */
-		accumulator = (accumulator << shift) + c - L'0';
-		break;
-
-	      default:
-		/*
-		 * This is the character immediately following
-		 * an encoded octal digit sequence ...
-		 */
-		if( accumulator > 0 )
-		{
-		  /* if it is a valid, non-NUL character code,
-		   * add it into the workspace ...
-		   */
-		  size_t len;
-		  dfprintf(( stderr, "\n%s:%u:", input, linenum ));
-		  len = mc_add_escape( iconv_map, messages + msgloc, accumulator );
-		  if( len > (size_t)(0) )
-		  {
-		    /* ... adjusting `headroom' and `msgloc' accordingly.
-		     */
-	    	    headroom -= len;
-    		    msgloc += len;
-		  }
-		}
-		/* Cancel the shift state which brought us to here;
-		 * its purpose has been satisfied.
-		 */
-		shift = 0;
-	    }
+	    skip = 0;
+	    status |= FLUSH;
 	  }
-	  /* Do not use `else' here; the `shift' state may have changed
-	   * since the preceding check, in which case, we may also need
-	   * to do this ...
-	   */
-	  if( shift == 0 )
-	  {
-	    /* Continue scanning the current input line,
-	     * until we find the end-of-line marker.
-	     */
-	    if( c != L'\n' )
-	    {
-	      /* We haven't reached end-of-line yet ...
-	       * Check for other characters with special significance.
-	       */
-	      if( last_char == L'\r' )
-	      {
-		size_t len;
-
-		/* The previous character was a deferred carriage return,
-		 * but it was *not* the lead byte in a CRLF line terminator,
-		 * so we need to emit it into the message definition.
-		 */
-		dfprintf(( stderr, "\n%s:%u:", input, linenum ));
-		len = mc_add_escape( iconv_map, messages + msgloc, L'\r' );
-		if( len > (size_t)(0) )
-		{
-		  headroom -= len;
-		  msgloc += len;
-		}
-	      }
-
-	      if( status & ESCAPE )
-	      {
-		/* The current input character was escaped ...
-		 * Clear the ESCAPE flag, and interpret this case.
-		 */
-		size_t len = 0;
-		status &= ~ESCAPE;
-		dfprintf(( stderr, "\n%s:%u:", input, linenum ));
-		switch( c )
-		{
-		  /* Thus, for the standard escape sequences ...
-		   */
-		  case L'b':
-		    /*
-		     * The "\b" escape sequence is to be interpreted as
-		     * a literal backspace; encode it ...
-		     */
-		    len = mc_add_escape( iconv_map, messages + msgloc, L'\b' );
-		    break;
-
-		  case L'r':
-		    /*
-		     * Similarly for "\r", which is to be encoded as
-		     * a carriage return ...
-		     */
-		    len = mc_add_escape( iconv_map, messages + msgloc, L'\r' );
-		    break;
-
-		  case L'n':
-		    /*
-		     * And for "\n", representing a newline ...
-		     */
-		    len = mc_add_escape( iconv_map, messages + msgloc, L'\n' );
-		    break;
-
-		  case L't':
-		    /*
-		     * ... "\t", representing a horizontal tab ...
-		     */
-		    len = mc_add_escape( iconv_map, messages + msgloc, L'\t' );
-		    break;
-
-		  case L'v':
-		    /*
-		     * ... "\v", representing a vertical tab ...
-		     */
-		    len = mc_add_escape( iconv_map, messages + msgloc, L'\v' );
-		    break;
-
-		  case L'f':
-		    /*
-		     * ... and "\f", representing a form feed.
-		     */
-		    len = mc_add_escape( iconv_map, messages + msgloc, L'\f' );
-		    break;
-
-		  case L'0' ... L'7':
-		    /*
-		     * This is the first in a "\ddd" octal digit sequence;
-		     * initialise the accumulator, and activate the appropriate
-		     * shift state, to capture the remaining digits.
-		     */
-		    accumulator = c - L'0';
-		    shift = OCTAL_SEQUENCE_DECODE;
-		    break;
-
-		  default:
-		    /*
-		     * Anything else is not a special case; we can simply pass it
-		     * through as a regular character.  Notice that we don't need
-		     * to treat "\\" as special; this default action produces the
-		     * desired effect.
-		     */
-		    xcount += skip;
-		    dfprintf(( stderr, "pass through escape code: %0#4.4x", c ));
-		}
-		if( len > (size_t)(0) )
-		{
-		  /* Adjust the `headroom' counter, and the current `msgloc' offset,
-		   * to account for the escape code we just added to the message buffer.
-		   */
-		  headroom -= len;
-		  msgloc += len;
-		}
-	      }
-
-	      else if( c == L'\\' )
-	      {
-		/* This is the escape character ...
-		 * Set the parser flags, so that cached message data is flushed,
-		 * and switch to ESCAPE mode, to interpret the next character.
-		 */
-		status |= FLUSH | ESCAPE;
-	      }
-
-	      else if( c == quote )
-	      {
-		/* This is the designated `quote' character ...
-		 * Toggle the state of the quoted context indicator flag.
-		 */
-		dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
-		status = (status ^ QUOTED) | FLUSH;
-	      }
-
-	      else
-	      {
-		/* This is just a regular character ...
-		 * Schedule it for copying it to the message buffer,
-		 * when the next FLUSH is invoked.
-		 */
-		xcount += skip;
-		dinvoke(( dtrace = dtrace ? dtrace : fprintf( stderr, "\n%s:%u:scan input: ", input, linenum ) ));
-		dfputc(( c, stderr ));
-	      }
-	    }
-	    if( count < ICONV_MB_LEN_MAX )
-	    {
-	      /* There may not be sufficient bytes in the input queue,
-	       * to satisfy a fetch request for a potential multibyte sequence,
-	       * so request a FLUSH now, so that the buufer may be replenished.
-	       *
-	       * Note that we must reset `skip', to avoid double accounting
-	       * for content already scheduled for, but not yet copied to the
-	       * message compilation buffer.
-	       */
-	      skip = 0;
-	      status |= FLUSH;
-	    }
-	  }
-	}
-
-	if( c == L'\n' )
-	{
-	  /* Mark the end of the current input line,
-	   * and schedule flushing of any pending message data from this line
-	   * to the message collection buffer.
-	   */
-	  status |= NEWLINE | FLUSH;
-
-	  /* If "QUOTED" context remains active, at the end of this line,
-	   * then we have an implicit continuation, so force it.
-	   */
-	  if( (status & QUOTED) == QUOTED )
-	    status |= CONTINUED;
-
-	  /* Clean up the context of any pending directive processing.
-	   */
-	  switch( status & CATEGORY )
-	  {
-	    case DEFQUOTE:
-	      /*
-	       * If we see end of line with a DEFQUOTE pending, then
-	       * there was no defining character with the "quote" directive,
-	       * so we must disable "quote" character recognition.
-	       */
-	      quote = L'\0';
-	      dfprintf(( stderr, ": none assigned" ));
-	      break;
-	  }
-
-	  if( (status & CONTINUED) == 0 )
-	  {
-	    /* the following input line is not marked as a continuation,
-	     * so its initial character *must* be interpreted as a member
-	     * of the POSIX Portable Character Set.
-	     */
-	    status &= ~ENCODED;
-	  }
-	}
+        }
+
+        if( c == L'\n' )
+        {
+          /* Mark the end of the current input line,
+           * and schedule any pending message data from this line
+           * for flushing to the message collection buffer.
+           */
+
+          status |= NEWLINE | FLUSH;
+
+          /* If "QUOTED" context remains active, at the end of this line,
+           * then we have an implicit continuation, so force it.
+           */
+
+          if( (status & QUOTED) == QUOTED )
+            status |= CONTINUED;
+
+          /* Clean up the context of any pending directive processing.
+           */
+
+          switch( status & CATEGORY )
+          {
+            case DEFQUOTE:
+              /*
+               * If we see end of line with a DEFQUOTE pending,
+               * then there was no defining character with the "quote" directive,
+               * so we must disable "quote" character recognition.
+               */
+
+              quote = L'\0';
+              dfprintf(( stderr, ": none assigned" ));
+              break;
+          }
+
+          if( (status & CONTINUED) == 0 )
+          {
+            status &= ~ENCODED;
+          }
+        }
       }
 
       if( status & FLUSH )
@@ -1107,61 +738,27 @@
          * which now needs to be flushed to the output queue,
          * BEFORE we proceed to the next cycle.
          */
-	dinvoke(( dtrace = 0 ));
+
         while( headroom < (xcount + ICONV_MB_LEN_MAX) )
         {
-	  /* Ensure that the workspace includes sufficient free space
-	   * to accommodate all content to be transferred, plus at least
-	   * one additional maximum length multibyte character sequence.
-	   * if not, expand it in `BUFSIZ' increments, until it does ...
-	   */
           headroom += BUFSIZ;
-          dfprintf(( stderr, "\n%s:%u:insufficient workspace remaining; grow allocation to %u bytes", input, linenum, (unsigned)(msgloc + headroom) ));
+          dfprintf(( stderr, "<grow allocation to %u bytes>", (unsigned)(msgloc + headroom) ));
           if( (messages = realloc( messages, msgloc + headroom )) == NULL )
 	  {
-	    /* ... bailing out, if the required expansion fails.
-	     */
-            gencat_errno = mc_errout( FATAL( MSG_OUT_OF_MEMORY ));
-	    close( input_fd );
+            gencat_errno = errout( FATAL( MSG_OUT_OF_MEMORY ));
 	    return NULL;
 	  }
         }
-	/* Adjust the `headroom' counter, and the `msgloc' offset,
-	 * to account for the content, as it is transferred.
-	 */
         headroom -= xcount;
 	dfprintf(( stderr, "\n%s:%u:", input, linenum ));
-        msgloc = mc_update_workspace( messages + msgloc, p - xcount - skip, xcount )
-               - messages;
-	dfprintf(( stderr, "; %u byte%s free", headroom, headroom == 1 ? "" : "s" ));
-
+        msgloc = update_workspace( messages + msgloc, p - xcount - skip, xcount ) - messages;
+	dfprintf(( stderr, "; %u byte%s free\n", headroom, headroom == 1 ? "" : "s" ));
         if( (status & (MSGTEXT | NEWLINE | CONTINUED)) == (MSGTEXT | NEWLINE) )
         {
-	  /* We've found the end of a message definition record in our input,
-	   * and it is not marked for continuation on the following input line;
-	   * we must terminate the associated entry in our message buffer.
-	   *
-	   * Note that we *must* create a local variable to pass the terminator
-	   * code; the `iconv_wctomb' marcro needs to pass the *address* for
-	   * this to the `iconv_wrap' function.
-	   */
           wchar_t terminator = L'\0';
-	  if( codeset == NULL )
-	  {
-	    /* No codeset mapping is yet in place,
-	     * so default to the codeset of the system locale.
-	     */
-	    codeset = map_codeset( iconv_map, mc_default_codeset(), "wchar_t" );
-	    codeset_decl_lineno = linenum;
-	    codeset_decl_src = input;
-	  }
-	  /* Encode the terminator, and add it into the workspace ...
-	   */
-          xcount = iconv_wctomb( messages + msgloc, terminator );
+          int xcount = iconv_wctomb( messages + msgloc, terminator );
           if( xcount >= 0 )
           {
-	    /* ... adjusting `headroom' counter and `msgloc' offset accordingly.
-	     */
             dfprintf(( stderr, "\n%s:%u:end of message; terminator added: %d byte(s)", input, linenum, xcount ));
             msgloc += xcount;
             headroom -= xcount;
@@ -1171,7 +768,6 @@
             dfprintf(( stderr, "\n%s:%u:end of message: add terminator failed", input, linenum ));
           }
           tail->len = msgloc - tail->loc;
-	  status &= ~MSGTEXT;
         }
         status &= ~FLUSH;
         xcount = 0;
@@ -1181,33 +777,26 @@
 	 */
 	if( (p - buf) > high_water_mark )
 	{
-	  int ref;
 	  char *copyptr;
 	  for( copyptr = buf; count; count-- )
 	    *copyptr++ = *p++;
-	  p = buf; ref = count = copyptr - p;
+	  p = buf; count = copyptr - p;
 	  dfprintf(( stderr, "\n%s:%u:input count depleted: %u byte%s remaining", input, linenum, count, count == 1 ? "" : "s" ));
-	  if( (fd >= 0)
-	  &&  (ref == (count += read( fd, copyptr, sizeof( buf ) - count )))  )
-	    fd = -1;
+	  count += read( fd, copyptr, sizeof( buf ) - count );
 	  dfprintf(( stderr, "; read new input: count adjusted to %u byte%s", count, count == 1 ? "" : "s" ));
 	  high_water_mark = count - ( count >> 2 );
 	}
       }
-      /* Make a note of the character code we have just parsed,
-       * for possible deferred processing in the next cycle.
-       */
-      last_char = c;
     }
     dfprintf(( stderr, "\n%s:end of input; (count is now %d bytes)", input, count ));
   }
   /*
-   * At the end of the current input file ...
+   * At the end of the current input file...
    * Check that the parser finished in an appropriate termination state.
    */
   if( status & QUOTED )
   {
-    /* Abnormal termination ...
+    /* Abnormal termination...
      * EOF was encountered within a quoted literal, before the closing
      * quote was found; diagnose abnormal termination state.
      */
@@ -1216,7 +805,7 @@
 
   if( (status & NEWLINE) != NEWLINE )
   {
-    /* Abnormal termination ...
+    /* Abnormal termination...
      * The input file lacks a terminating newline; diagnose abnormal
      * termination state.
      */
@@ -1225,7 +814,7 @@
 
   if( status & MSGTEXT )
   {
-    /* Abnormal termination ...
+    /* Abnormal termination...
      * EOF was encountered while parsing a continued message definition;
      * dignose abnormal termination state, and mark incomplete message
      * for deletion.
@@ -1242,12 +831,6 @@
   dfprintf(( stderr, "\n\nAllocation adjusted to %u bytes\n", (unsigned)(msgloc) ));
   for( tail = head; tail != NULL; tail = tail->link )
   {
-    /* Just do this for all entries in the list!
-     * Don't assume we can optimise by quitting if we find a reference
-     * which is already mapped to the correct address; the list could
-     * have moved, and subsequently have moved back to the old address,
-     * in which case a later entry could be invalid.
-     */
     if( tail->base != NULL )
       /*
        * Update index entries *except* those with a NULL base pointer;
@@ -1258,11 +841,7 @@
   }
   dfputc(( L'\n', stderr ));
 
-  /* We are done with the current input source;
-   * close its file descriptor, and return the message list.
-   */
-  close( input_fd );
   return head;
 }
 
-/* $RCSfile$Revision: 1.12 $: end of file */
+/* $RCSfile$Revision$: end of file */

Get latest updates about Open Source Projects, Conferences and News.

Sign up for the SourceForge newsletter:





No, thanks