Diff of /Doc/lib/libshlex.tex [16f47b] .. [4d2259] Maximize Restore

  Switch to side-by-side view

--- a/Doc/lib/libshlex.tex
+++ b/Doc/lib/libshlex.tex
@@ -4,26 +4,16 @@
 \declaremodule{standard}{shlex}
 \modulesynopsis{Simple lexical analysis for \UNIX\ shell-like languages.}
 \moduleauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
+\moduleauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
 \sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
+\sectionauthor{Gustavo Niemeyer}{niemeyer@conectiva.com}
 
 \versionadded{1.5.2}
 
 The \class{shlex} class makes it easy to write lexical analyzers for
 simple syntaxes resembling that of the \UNIX{} shell.  This will often
-be useful for writing minilanguages, e.g.\ in run control files for
-Python applications.
-
-\begin{classdesc}{shlex}{\optional{stream\optional{, file}}}
-A \class{shlex} instance or subclass instance is a lexical analyzer
-object.  The initialization argument, if present, specifies where to
-read characters from. It must be a file- or stream-like object with
-\method{read()} and \method{readline()} methods.  If no argument is given,
-input will be taken from \code{sys.stdin}.  The second optional 
-argument is a filename string, which sets the initial value of the
-\member{infile} member.  If the stream argument is omitted or
-equal to \code{sys.stdin}, this second argument defaults to ``stdin''.
-\end{classdesc}
-
+be useful for writing minilanguages, (e.g. in run control files for
+Python applications) or for parsing quoted strings.
 
 \begin{seealso}
   \seemodule{ConfigParser}{Parser for configuration files similar to the
@@ -31,16 +21,50 @@
 \end{seealso}
 
 
+\subsection{Module Contents}
+
+The \module{shlex} module defines the following functions:
+
+\begin{funcdesc}{split}{s\optional{, posix=\code{True}\optional{,
+			spaces=\code{True}}}}
+Split the string \var{s} using shell-like syntax. If \code{posix} is
+\code{True}, operate in posix mode. If \code{spaces} is \code{True}, it
+will only split words in whitespaces (setting the
+\member{whitespace_split} member of the \class{shlex} instance).
+\versionadded{2.3}
+\end{funcdesc}
+
+The \module{shlex} module defines the following classes:
+
+\begin{classdesc}{shlex}{\optional{instream=\code{sys.stdin}\optional{,
+			 infile=\code{None}\optional{,
+			 posix=\code{False}}}}}
+A \class{shlex} instance or subclass instance is a lexical analyzer
+object.  The initialization argument, if present, specifies where to
+read characters from. It must be a file-/stream-like object with
+\method{read()} and \method{readline()} methods, or a string (strings
+are accepted since Python 2.3). If no argument is given, input will be
+taken from \code{sys.stdin}.  The second optional argument is a filename
+string, which sets the initial value of the \member{infile} member.  If
+the \var{instream} argument is omitted or equal to \code{sys.stdin},
+this second argument defaults to ``stdin''.  The \var{posix} argument
+was introduced in Python 2.3, and defines the operational mode. When
+\var{posix} is not true (default), the \class{shlex} instance will
+operate in compatibility mode. When operating in posix mode,
+\class{shlex} will try to be as close as possible to the posix shell
+parsing rules. See~\ref{shlex-objects}.
+\end{classdesc}
+
 \subsection{shlex Objects \label{shlex-objects}}
 
 A \class{shlex} instance has the following methods:
-
 
 \begin{methoddesc}{get_token}{}
 Return a token.  If tokens have been stacked using
 \method{push_token()}, pop a token off the stack.  Otherwise, read one
 from the input stream.  If reading encounters an immediate
-end-of-file, an empty string is returned. 
+end-of-file, \member{self.eof} is returned (the empty string (\code{""})
+in non-posix mode, and \code{None} in posix mode).
 \end{methoddesc}
 
 \begin{methoddesc}{push_token}{str}
@@ -132,11 +156,31 @@
 carriage-return.
 \end{memberdesc}
 
+\begin{memberdesc}{escape}
+Characters that will be considered as escape. This will be only used
+in posix mode, and includes just \character{\textbackslash} by default.
+\versionadded{2.3}
+\end{memberdesc}
+
 \begin{memberdesc}{quotes}
 Characters that will be considered string quotes.  The token
 accumulates until the same quote is encountered again (thus, different
 quote types protect each other as in the shell.)  By default, includes
 \ASCII{} single and double quotes.
+\end{memberdesc}
+
+\begin{memberdesc}{escapedquotes}
+Characters in \member{quotes} that will interpret escape characters
+defined in \member{escape}. This is only used in posix mode, and includes
+just \character{"} by default.
+\versionadded{2.3}
+\end{memberdesc}
+
+\begin{memberdesc}{whitespace_split}
+If true, tokens will only be split in whitespaces. This is useful, for
+example, for parsing command lines with \class{shlex}, getting tokens
+in a similar way to shell arguments.
+\versionadded{2.3}
 \end{memberdesc}
 
 \begin{memberdesc}{infile}
@@ -168,13 +212,6 @@
 details.
 \end{memberdesc}
 
-Note that any character not declared to be a word character,
-whitespace, or a quote will be returned as a single-character token.
-
-Quote and comment characters are not recognized within words.  Thus,
-the bare words \samp{ain't} and \samp{ain\#t} would be returned as single
-tokens by the default parser.
-
 \begin{memberdesc}{lineno}
 Source line number (count of newlines seen so far plus one).
 \end{memberdesc}
@@ -183,3 +220,56 @@
 The token buffer.  It may be useful to examine this when catching
 exceptions.
 \end{memberdesc}
+
+\begin{memberdesc}{eof}
+Token used to determine end of file. This will be set to the empty
+string (\code{""}), in non-posix mode, and to \code{None} in posix
+mode.
+\versionadded{2.3}
+\end{memberdesc}
+
+\subsection{Parsing Rules\label{shlex-parsing-rules}}
+
+When operating in non-posix mode, \class{shlex} with try to obey to the
+following rules.
+
+\begin{itemize}
+\item Quote characters are not recognized within words
+      (\code{Do"Not"Separate} is parsed as the single word
+      \code{Do"Not"Separate});
+\item Escape characters are not recognized;
+\item Enclosing characters in quotes preserve the literal value of
+      all characters within the quotes;
+\item Closing quotes separate words (\code{"Do"Separate} is parsed
+      as \code{"Do"} and \code{Separate});
+\item If \member{whitespace_split} is \code{False}, any character not
+      declared to be a word character, whitespace, or a quote will be
+      returned as a single-character token. If it is \code{True},
+      \class{shlex} will only split words in whitespaces;
+\item EOF is signaled with an empty string (\code{""});
+\item It's not possible to parse empty strings, even if quoted.
+\end{itemize}
+
+When operating in posix mode, \class{shlex} will try to obey to the
+following parsing rules.
+
+\begin{itemize}
+\item Quotes are stripped out, and do not separate words
+      (\code{"Do"Not"Separate"} is parsed as the single word
+      \code{DoNotSeparate});
+\item Non-quoted escape characters (e.g. \character{\textbackslash})
+      preserve the literal value of the next character that follows;
+\item Enclosing characters in quotes which are not part of
+      \member{escapedquotes} (e.g. \character{'}) preserve the literal
+      value of all characters within the quotes;
+\item Enclosing characters in quotes which are part of
+      \member{escapedquotes} (e.g. \character{"}) preserves the literal
+      value of all characters within the quotes, with the exception of
+      the characters mentioned in \member{escape}. The escape characters
+      retain its special meaning only when followed by the quote in use,
+      or the escape character itself. Otherwise the escape character
+      will be considered a normal character.
+\item EOF is signaled with a \code{None} value;
+\item Quoted empty strings (\code{""}) are allowed;
+\end{itemize}
+