From: <mi...@us...> - 2009-09-10 11:02:44
|
Author: milde Date: 2009-09-10 13:02:27 +0200 (Thu, 10 Sep 2009) New Revision: 6120 Modified: trunk/docutils/HISTORY.txt trunk/docutils/docutils/frontend.py trunk/docutils/docutils/io.py trunk/docutils/docutils/parsers/rst/directives/misc.py trunk/docutils/docutils/utils.py Log: Fix [ 2781629 ] support non-ASCII chars in file names. Modified: trunk/docutils/HISTORY.txt =================================================================== --- trunk/docutils/HISTORY.txt 2009-09-09 09:21:59 UTC (rev 6119) +++ trunk/docutils/HISTORY.txt 2009-09-10 11:02:27 UTC (rev 6120) @@ -41,6 +41,7 @@ - Apply [ 1878977 ] make_id(): deaccent characters. - Apply [ 2029251 ] return nonzero when tests fail. - Fix [ 1692788 ] allow UTF-8 in style sheets. + - Fix [ 2781629 ] support non-ASCII chars in file names. * reStructuredText: Modified: trunk/docutils/docutils/frontend.py =================================================================== --- trunk/docutils/docutils/frontend.py 2009-09-09 09:21:59 UTC (rev 6119) +++ trunk/docutils/docutils/frontend.py 2009-09-10 11:02:27 UTC (rev 6120) @@ -79,7 +79,7 @@ config_parser=None, config_section=None): try: codecs.lookup_error(value) - except AttributeError: # prior to Python 2.3 + except AttributeError: # TODO: remove (only needed prior to Python 2.3) if value not in ('strict', 'ignore', 'replace', 'xmlcharrefreplace'): raise (LookupError( 'unknown encoding error handler: "%s" (choices: ' @@ -293,6 +293,13 @@ '0': 0, 'off': 0, 'no': 0, 'false': 0, '': 0} """Lookup table for boolean configuration file settings.""" + try: + default_error_encoding = sys.stderr.encoding or 'ascii' + except AttributeError: + default_error_encoding = 'ascii' + + # TODO: variable no longer needed since 'backslashreplace' is + # part of Python >= 2.3 (required since Docutils 0.6) if hasattr(codecs, 'backslashreplace_errors'): default_error_encoding_error_handler = 'backslashreplace' else: @@ -344,11 +351,11 @@ ('Disable backlinks from footnotes and citations.', ['--no-footnote-backlinks'], {'dest': 'footnote_backlinks', 'action': 'store_false'}), - ('Enable section numbering. (default)', + ('Enable section numbering by Docutils. (default)', ['--section-numbering'], {'action': 'store_true', 'dest': 'sectnum_xform', 'default': 1, 'validator': validate_boolean}), - ('Disable section numbering.', + ('Disable section numbering by Docutils.', ['--no-section-numbering'], {'action': 'store_false', 'dest': 'sectnum_xform'}), ('Remove comment elements from the document tree.', @@ -421,14 +428,14 @@ 'validator': validate_encoding_and_error_handler}), ('Specify error handler for unencodable output characters; ' '"strict" (default), "ignore", "replace", ' - '"xmlcharrefreplace", "backslashreplace" (Python 2.3+).', + '"xmlcharrefreplace", "backslashreplace".', ['--output-encoding-error-handler'], {'default': 'strict', 'validator': validate_encoding_error_handler}), ('Specify text encoding and error handler for error output. ' - 'Default: ASCII:%s.' - % default_error_encoding_error_handler, + 'Default: %s:%s.' + % (default_error_encoding, default_error_encoding_error_handler), ['--error-encoding', '-e'], - {'metavar': '<name[:handler]>', 'default': 'ascii', + {'metavar': '<name[:handler]>', 'default': default_error_encoding, 'validator': validate_encoding_and_error_handler}), ('Specify the error handler for unencodable characters in ' 'error output. Default: %s.' Modified: trunk/docutils/docutils/io.py =================================================================== --- trunk/docutils/docutils/io.py 2009-09-09 09:21:59 UTC (rev 6119) +++ trunk/docutils/docutils/io.py 2009-09-10 11:02:27 UTC (rev 6120) @@ -231,9 +231,9 @@ raise print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error) - print >>sys.stderr, ( - 'Unable to open source file for reading (%r). Exiting.' - % source_path) + print >>sys.stderr, ('Unable to open source file for ' + "reading ('%s'). Exiting." % + source_path) sys.exit(1) else: self.source = sys.stdin @@ -314,8 +314,8 @@ raise print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error) - print >>sys.stderr, ('Unable to open destination file for writing ' - '(%r). Exiting.' % self.destination_path) + print >>sys.stderr, ('Unable to open destination file for writing' + " ('%s'). Exiting." % self.destination_path) sys.exit(1) self.opened = 1 @@ -349,7 +349,7 @@ print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error) print >>sys.stderr, ('Unable to open destination file for writing ' - '(%r). Exiting.' % self.destination_path) + "('%s'). Exiting." % self.destination_path) sys.exit(1) self.opened = 1 Modified: trunk/docutils/docutils/parsers/rst/directives/misc.py =================================================================== --- trunk/docutils/docutils/parsers/rst/directives/misc.py 2009-09-09 09:21:59 UTC (rev 6119) +++ trunk/docutils/docutils/parsers/rst/directives/misc.py 2009-09-10 11:02:27 UTC (rev 6120) @@ -52,6 +52,7 @@ path = os.path.join(self.standard_include_path, path[1:-1]) path = os.path.normpath(os.path.join(source_dir, path)) path = utils.relative_path(None, path) + path = nodes.reprunicode(path) encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) try: Modified: trunk/docutils/docutils/utils.py =================================================================== --- trunk/docutils/docutils/utils.py 2009-09-09 09:21:59 UTC (rev 6119) +++ trunk/docutils/docutils/utils.py 2009-09-10 11:02:27 UTC (rev 6120) @@ -70,7 +70,7 @@ SEVERE_LEVEL) = range(5) def __init__(self, source, report_level, halt_level, stream=None, - debug=0, encoding='ascii', error_handler='replace'): + debug=0, encoding=None, error_handler='replace'): """ :Parameters: - `source`: The path to or description of the source data. @@ -83,16 +83,13 @@ ``.write`` method), a string (file name, opened for writing), '' (empty string, for discarding all stream messages) or `None` (implies `sys.stderr`; default). - - `encoding`: The encoding for stderr output. + - `encoding`: The output encoding. - `error_handler`: The error handler for stderr output encoding. """ self.source = source """The path to or description of the source data.""" - self.encoding = encoding - """The character encoding for the stderr output.""" - self.error_handler = error_handler """The character encoding error handler.""" @@ -120,6 +117,15 @@ self.stream = stream """Where warning output is sent.""" + if encoding is None: + try: + encoding = stream.encoding + except AttributeError: + pass + + self.encoding = encoding or 'ascii' + """The output character encoding.""" + self.observers = [] """List of bound methods or functions to call with each system_message created.""" @@ -322,6 +328,25 @@ class NameValueError(DataError): pass +def decode_path(path): + """ + Decode file/path string. Return `nodes.reprunicode` object. + + Provides a conversion to unicode without the UnicodeDecode error of the + implicit 'ascii:strict' decoding. + """ + # see also http://article.gmane.org/gmane.text.docutils.user/2905 + try: + path = path.decode(sys.getfilesystemencoding(), 'strict') + except UnicodeDecodeError: + path = path.decode('utf-8', 'strict') + try: + path = path.decode(sys.getfilesystemencoding(), 'strict') + except UnicodeDecodeError: + path = path.decode('ascii', 'replace') + return nodes.reprunicode(path) + + def extract_name_value(line): """ Return a list of (name, value) from a line of the form "name=value ...". @@ -396,6 +421,7 @@ from docutils import frontend if settings is None: settings = frontend.OptionParser().get_default_values() + source_path = decode_path(source_path) reporter = new_reporter(source_path, settings) document = nodes.document(settings, reporter, source=source_path) document.note_source(source_path, -1) |