From: <mi...@us...> - 2011-12-14 23:53:45
|
Revision: 7256 http://docutils.svn.sourceforge.net/docutils/?rev=7256&view=rev Author: milde Date: 2011-12-14 23:53:38 +0000 (Wed, 14 Dec 2011) Log Message: ----------- Clean up record_dependencies feature. Use utf8 encoding for the record file: simple, failsave and reproducible way for portable storage of non-ASCII filenames (cf. http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html). Drop latex2e exception: Record only files required to generate the LaTeX source. Modified Paths: -------------- trunk/docutils/HISTORY.txt trunk/docutils/docs/user/config.txt trunk/docutils/docutils/parsers/rst/directives/images.py trunk/docutils/docutils/parsers/rst/directives/misc.py trunk/docutils/docutils/utils.py trunk/docutils/docutils/writers/html4css1/__init__.py trunk/docutils/docutils/writers/latex2e/__init__.py trunk/docutils/test/data/dependencies.txt trunk/docutils/test/test_dependencies.py Modified: trunk/docutils/HISTORY.txt =================================================================== --- trunk/docutils/HISTORY.txt 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/HISTORY.txt 2011-12-14 23:53:38 UTC (rev 7256) @@ -32,8 +32,8 @@ * docutils/utils.py - - DependencyList uses io.FileOutput to prevent errors recording - non-ASCII filenames (fixes [ 3434355 ]. + - DependencyList uses io.FileOutput and 'utf8' encoding to prevent + errors recording non-ASCII filenames (fixes [ 3434355 ]. * docutils/parsers/rst/states.py @@ -48,6 +48,7 @@ * docutils/writers/latex2e/__init__.py - Support the `abbreviation` and `acronym` standard roles. + - Record only files required to generate the LaTeX source as dependencies. * docutils/writers/html4css1/__init__.py Modified: trunk/docutils/docs/user/config.txt =================================================================== --- trunk/docutils/docs/user/config.txt 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/docs/user/config.txt 2011-12-14 23:53:38 UTC (rev 7256) @@ -387,14 +387,24 @@ --output-encoding, -o``. _`record_dependencies` - Path to a file where Docutils will write a list of files that the - input and output depend on [#dependencies]_, e.g. due to file - inclusion. [#pwd]_ The format is one filename per line. This - option is particularly useful in conjunction with programs like - ``make``. + Path to a file where Docutils will write a list of files that were + required to generate the output, e.g. included files or embedded + stylesheets [#dependencies]_. [#pwd]_ The format is one path per + line with forward slashes as separator, the encoding is ``utf8``. Set to ``-`` in order to write dependencies to stdout. + This option is particularly useful in conjunction with programs like + ``make`` using ``Makefile`` rules like:: + + ham.html: ham.txt $(shell cat hamdeps.txt) + rst2html.py --record-dependencies=hamdeps.txt ham.txt ham.html + + If the filesystem encoding differs from utf8, replace the ``cat`` + command with a call to a converter, e.g.:: + + $(shell iconv -f utf8 -t latin1 hamdeps.txt) + Default: None. Option: ``--record-dependencies``. _`report_level` @@ -1436,20 +1446,9 @@ do the overriding explicitly, by assigning ``None`` to the other settings. -.. [#dependencies] Some notes on the dependency recorder: +.. [#dependencies] Images are only added to the dependency list if the + reStructuredText parser extracted image dimensions from the file. - * Images are only added to the dependency list if the - reStructuredText parser extracted image dimensions from the file. - - * Stylesheets are only added if they are embedded. - - * For practical reasons, the output of the LaTeX writer is - considered merely an *intermediate* processing stage. The - dependency recorder records all files the *rendered* file - (e.g. in PDF or DVI format) depends on. Thus, images and - stylesheets are both unconditionally recorded as dependencies - when using the LaTeX writer. - .. [#footnote_space] The footnote space is trimmed if the reference style is "superscript", and it is left if the reference style is "brackets". Modified: trunk/docutils/docutils/parsers/rst/directives/images.py =================================================================== --- trunk/docutils/docutils/parsers/rst/directives/images.py 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/docutils/parsers/rst/directives/images.py 2011-12-14 23:53:38 UTC (rev 7256) @@ -10,18 +10,22 @@ import sys +import urllib from docutils import nodes, utils from docutils.parsers.rst import Directive from docutils.parsers.rst import directives, states from docutils.nodes import fully_normalize_name, whitespace_normalize_name from docutils.parsers.rst.roles import set_classes - -try: - import Image as PIL # PIL +try: # check for the Python Imaging Library + import PIL except ImportError: - PIL = None + try: # sometimes PIL modules are put in PYTHONPATH's root + import Image + class PIL(object): pass # dummy wrapper + PIL.Image = Image + except ImportError: + PIL = None - class Image(Directive): align_h_values = ('left', 'center', 'right') @@ -121,15 +125,17 @@ figure_node = nodes.figure('', image_node) if figwidth == 'image': if PIL and self.state.document.settings.file_insertion_enabled: - # PIL doesn't like Unicode paths: + imagepath = urllib.url2pathname(image_node['uri']) try: - i = PIL.open(str(image_node['uri'])) - except (IOError, UnicodeError): - pass + img = PIL.Image.open( + imagepath.encode(sys.getfilesystemencoding())) + except (IOError, UnicodeEncodeError): + pass # TODO: warn? else: self.state.document.settings.record_dependencies.add( - image_node['uri']) - figure_node['width'] = i.size[0] + imagepath.replace('\\', '/')) + figure_node['width'] = img.size[0] + del img elif figwidth is not None: figure_node['width'] = figwidth if figclasses: Modified: trunk/docutils/docutils/parsers/rst/directives/misc.py =================================================================== --- trunk/docutils/docutils/parsers/rst/directives/misc.py 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/docutils/parsers/rst/directives/misc.py 2011-12-14 23:53:38 UTC (rev 7256) @@ -198,12 +198,14 @@ self.options['file'])) path = utils.relative_path(None, path) try: - self.state.document.settings.record_dependencies.add(path) raw_file = io.FileInput( source_path=path, encoding=encoding, error_handler=(self.state.document.settings.\ input_encoding_error_handler), handle_io_errors=None) + # TODO: currently, raw input files are recorded as + # dependencies even if not used for the chosen output format. + self.state.document.settings.record_dependencies.add(path) except IOError, error: raise self.severe(u'Problems with "%s" directive path:\n%s.' % (self.name, ErrorString(error))) Modified: trunk/docutils/docutils/utils.py =================================================================== --- trunk/docutils/docutils/utils.py 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/docutils/utils.py 2011-12-14 23:53:38 UTC (rev 7256) @@ -662,7 +662,7 @@ return taglist -class DependencyList: +class DependencyList(object): """ List of dependencies, with file recording support. @@ -699,9 +699,7 @@ else: of = output_file self.file = FileOutput(destination_path=of, - encoding=sys.getfilesystemencoding(), - error_handler='xmlcharrefreplace', - autoclose=False) + encoding='utf8', autoclose=False) else: self.file = None @@ -725,8 +723,8 @@ self.file = None def __repr__(self): - if self.file: + try: output_file = self.file.name - else: + except AttributeError: output_file = None return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list) Modified: trunk/docutils/docutils/writers/html4css1/__init__.py =================================================================== --- trunk/docutils/docutils/writers/html4css1/__init__.py 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/docutils/writers/html4css1/__init__.py 2011-12-14 23:53:38 UTC (rev 7256) @@ -19,10 +19,16 @@ import os.path import time import re -try: - import Image # check for the Python Imaging Library +import urllib +try: # check for the Python Imaging Library + import PIL except ImportError: - Image = None + try: # sometimes PIL modules are put in PYTHONPATH's root + import Image + class PIL(object): pass # dummy wrapper + PIL.Image = Image + except ImportError: + PIL = None import docutils from docutils import frontend, nodes, utils, writers, languages, io from docutils.transforms import writer_aux @@ -286,10 +292,10 @@ styles = [utils.relative_path(settings._destination, sheet) for sheet in styles] if settings.embed_stylesheet: - settings.record_dependencies.add(*styles) self.stylesheet = [self.embedded_stylesheet % io.FileInput(source_path=sheet, encoding='utf-8').read() for sheet in styles] + settings.record_dependencies.add(*styles) else: # link to stylesheets self.stylesheet = [self.stylesheet_link % self.encode(stylesheet) for stylesheet in styles] @@ -1006,18 +1012,22 @@ if 'height' in node: atts['height'] = node['height'] if 'scale' in node: - if Image and not ('width' in node and 'height' in node): + if (PIL and not ('width' in node and 'height' in node) + and self.settings.file_insertion_enabled): + imagepath = urllib.url2pathname(uri) try: - im = Image.open(str(uri)) - except (IOError, # Source image can't be found or opened - UnicodeError): # PIL doesn't like Unicode paths. - pass + img = PIL.Image.open( + imagepath.encode(sys.getfilesystemencoding())) + except (IOError, UnicodeEncodeError): + pass # TODO: warn? else: + self.settings.record_dependencies.add( + imagepath.replace('\\', '/')) if 'width' not in atts: - atts['width'] = str(im.size[0]) + atts['width'] = str(img.size[0]) if 'height' not in atts: - atts['height'] = str(im.size[1]) - del im + atts['height'] = str(img.size[1]) + del img for att_name in 'width', 'height': if att_name in atts: match = re.match(r'([0-9.]+)(\S*)$', atts[att_name]) Modified: trunk/docutils/docutils/writers/latex2e/__init__.py =================================================================== --- trunk/docutils/docutils/writers/latex2e/__init__.py 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/docutils/writers/latex2e/__init__.py 2011-12-14 23:53:38 UTC (rev 7256) @@ -1284,6 +1284,8 @@ # Unicode chars that are not recognized by LaTeX's utf8 encoding unsupported_unicode_chars = { 0x00A0: ur'~', # NO-BREAK SPACE + # TODO: ensure white space also at the beginning of a line? + # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~' 0x00AD: ur'\-', # SOFT HYPHEN # 0x2008: ur'\,', # PUNCTUATION SPACE @@ -2225,9 +2227,8 @@ def visit_image(self, node): self.requirements['graphicx'] = self.graphicx_package attrs = node.attributes - # Convert image URI to a local file path and add to dependency list + # Convert image URI to a local file path imagepath = urllib.url2pathname(attrs['uri']).replace('\\', '/') - self.settings.record_dependencies.add(imagepath) # alignment defaults: if not 'align' in attrs: # Set default align of image in a figure to 'center' Modified: trunk/docutils/test/data/dependencies.txt =================================================================== --- trunk/docutils/test/data/dependencies.txt 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/test/data/dependencies.txt 2011-12-14 23:53:38 UTC (rev 7256) @@ -1,10 +1,29 @@ -.. image:: some_image.png +Test input for test_dependencies. +Docutils can write a list of files required to generate the output like +included files or embedded stylesheets. This is particularly useful in +conjunction with programs like ``make``. + +Included files are recorded: + .. include:: include.txt .. raw:: HTML :file: raw.txt +Dependencies are recorded only once: + .. include:: include.txt -.. image:: картина.jpg +Image files are only recorded, if actually accessed +(to extract the size or if embedded in the output document): + +.. image:: test.jpg + +.. figure:: ../docs/user/rst/images/title.png + :figwidth: image + +Scaled images without given size are recorded by the html writer: + +.. image:: ../docs/user/rst/images/biohazard.png + :scale: 50 % Modified: trunk/docutils/test/test_dependencies.py =================================================================== --- trunk/docutils/test/test_dependencies.py 2011-12-14 19:14:33 UTC (rev 7255) +++ trunk/docutils/test/test_dependencies.py 2011-12-14 23:53:38 UTC (rev 7256) @@ -15,79 +15,108 @@ import docutils.core import docutils.utils import docutils.io +from docutils.parsers.rst.directives.images import PIL +# docutils.utils.DependencyList records POSIX paths, +# i.e. "/" as a path separator even on Windows (not os.path.join). +paths = {'include': u'data/include.txt', # included rst file + 'raw': u'data/raw.txt', # included raw "HTML file" + 'scaled-image': u'../docs/user/rst/images/biohazard.png', + 'figure-image': u'../docs/user/rst/images/title.png', + 'stylesheet': u'data/stylesheet.txt', + 'default-stylesheet': u'../docutils/writers/html4css1/html4css1.css', + } + class RecordDependenciesTests(unittest.TestCase): - # docutils.utils.DependencyList records relative URLs, not platform paths, - # so use "/" as a path separator even on Windows (not os.path.join). - def get_record(self, **settings): recordfile = 'record.txt' + recorder = docutils.utils.DependencyList(recordfile) + # (Re) create the record file by running a conversion: settings.setdefault('source_path', os.path.join('data', 'dependencies.txt')) settings.setdefault('settings_overrides', {}) - settings['settings_overrides'] = settings['settings_overrides'].copy() - settings['settings_overrides']['_disable_config'] = 1 - if 'record_dependencies' not in settings['settings_overrides']: - settings['settings_overrides']['record_dependencies'] = \ - docutils.utils.DependencyList(recordfile) - docutils.core.publish_file( - destination=DocutilsTestSupport.DevNull(), **settings) - settings['settings_overrides']['record_dependencies'].close() + settings['settings_overrides'].update(_disable_config=True, + record_dependencies=recorder) + docutils.core.publish_file(destination=DocutilsTestSupport.DevNull(), + **settings) + recorder.close() + # Read the record file: record = docutils.io.FileInput(source_path=recordfile, encoding='utf8') return record.read().splitlines() def test_dependencies(self): - self.assertEqual(self.get_record(), - ['data/include.txt', 'data/raw.txt']) - self.assertEqual(self.get_record(writer_name='latex'), - ['data/include.txt', - 'data/raw.txt', - # this is a URL, not a path: - 'some_image.png', - # cyrillic filename (testing with an image, because - # this does not abort if the file does not exist): - u'\u043a\u0430\u0440\u0442\u0438\u043d\u0430.jpg']) + # Note: currently, raw input files are read (and hence recorded) while + # parsing even if not used in the chosen output format. + # This should change (see parsers/rst/directives/misc.py). + keys = ['include', 'raw'] + if PIL: + keys += ['figure-image'] + expected = [paths[key] for key in keys] + record = self.get_record(writer_name='xml') + # the order of the files is arbitrary + record.sort() + expected.sort() + self.assertEqual(record, expected) + def test_dependencies_html(self): + keys = ['include', 'raw', 'default-stylesheet'] + if PIL: + keys += ['figure-image', 'scaled-image'] + expected = [paths[key] for key in keys] + record = self.get_record(writer_name='html') + # the order of the files is arbitrary + record.sort() + expected.sort() + self.assertEqual(record, expected) + + def test_dependencies_latex(self): + # since 0.9, the latex writer records only really accessed files, too + # Note: currently, raw input files are read (and hence recorded) while + # parsing even if not used in the chosen output format. + # This should change (see parsers/rst/directives/misc.py). + keys = ['include', 'raw'] + if PIL: + keys += ['figure-image'] + expected = [paths[key] for key in keys] + record = self.get_record(writer_name='latex') + # the order of the files is arbitrary + record.sort() + expected.sort() + self.assertEqual(record, expected) + def test_csv_dependencies(self): try: import csv - self.assertEqual( - self.get_record(source_path=os.path.join('data', - 'csv_dep.txt')), - ['data/csv_data.txt']) + csvsource = os.path.join('data', 'csv_dep.txt') + self.assertEqual(self.get_record(source_path=csvsource), + ['data/csv_data.txt']) except ImportError: pass def test_stylesheet_dependencies(self): - # Parameters to publish_file. - s = {'settings_overrides': {}} - so = s['settings_overrides'] - so['embed_stylesheet'] = 0 - # must use '/', not os.sep or os.path.join, because of URL handling - # (see docutils.utils.relative_path): - stylesheet_path = 'data/stylesheet.txt' - so['stylesheet_path'] = stylesheet_path - so['stylesheet'] = None - s['writer_name'] = 'html' - record = self.get_record(**s) - self.assert_(stylesheet_path not in record, - '%r should not be in %r' % (stylesheet_path, record)) - so['embed_stylesheet'] = 1 - record = self.get_record(**s) - self.assert_(stylesheet_path in record, - '%r should be in %r' % (stylesheet_path, record)) - s['writer_name'] = 'latex' - record = self.get_record(**s) - self.assert_(stylesheet_path in record, - '%r should be in %r' % (stylesheet_path, record)) - del so['embed_stylesheet'] - record = self.get_record(**s) - self.assert_(stylesheet_path not in record, - '%r should not be in %r' % (stylesheet_path, record)) + stylesheet = paths['stylesheet'] + so = {'stylesheet_path': paths['stylesheet'], + 'stylesheet': None} + so['embed_stylesheet'] = False + record = self.get_record(writer_name='html', settings_overrides=so) + self.assert_(stylesheet not in record, + '%r should not be in %r' % (stylesheet, record)) + record = self.get_record(writer_name='latex', settings_overrides=so) + self.assert_(stylesheet not in record, + '%r should not be in %r' % (stylesheet, record)) + so['embed_stylesheet'] = True + record = self.get_record(writer_name='html', settings_overrides=so) + self.assert_(stylesheet in record, + '%r should be in %r' % (stylesheet, record)) + record = self.get_record(writer_name='latex', settings_overrides=so) + self.assert_(stylesheet in record, + '%r should be in %r' % (stylesheet, record)) + + if __name__ == '__main__': unittest.main() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |