|
From: <mi...@us...> - 2011-12-14 23:53:45
|
Revision: 7256
http://docutils.svn.sourceforge.net/docutils/?rev=7256&view=rev
Author: milde
Date: 2011-12-14 23:53:38 +0000 (Wed, 14 Dec 2011)
Log Message:
-----------
Clean up record_dependencies feature.
Use utf8 encoding for the record file: simple, failsave and
reproducible way for portable storage of non-ASCII filenames
(cf. http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html).
Drop latex2e exception: Record only files required to generate the LaTeX
source.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docs/user/config.txt
trunk/docutils/docutils/parsers/rst/directives/images.py
trunk/docutils/docutils/parsers/rst/directives/misc.py
trunk/docutils/docutils/utils.py
trunk/docutils/docutils/writers/html4css1/__init__.py
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/test/data/dependencies.txt
trunk/docutils/test/test_dependencies.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/HISTORY.txt 2011-12-14 23:53:38 UTC (rev 7256)
@@ -32,8 +32,8 @@
* docutils/utils.py
- - DependencyList uses io.FileOutput to prevent errors recording
- non-ASCII filenames (fixes [ 3434355 ].
+ - DependencyList uses io.FileOutput and 'utf8' encoding to prevent
+ errors recording non-ASCII filenames (fixes [ 3434355 ].
* docutils/parsers/rst/states.py
@@ -48,6 +48,7 @@
* docutils/writers/latex2e/__init__.py
- Support the `abbreviation` and `acronym` standard roles.
+ - Record only files required to generate the LaTeX source as dependencies.
* docutils/writers/html4css1/__init__.py
Modified: trunk/docutils/docs/user/config.txt
===================================================================
--- trunk/docutils/docs/user/config.txt 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/docs/user/config.txt 2011-12-14 23:53:38 UTC (rev 7256)
@@ -387,14 +387,24 @@
--output-encoding, -o``.
_`record_dependencies`
- Path to a file where Docutils will write a list of files that the
- input and output depend on [#dependencies]_, e.g. due to file
- inclusion. [#pwd]_ The format is one filename per line. This
- option is particularly useful in conjunction with programs like
- ``make``.
+ Path to a file where Docutils will write a list of files that were
+ required to generate the output, e.g. included files or embedded
+ stylesheets [#dependencies]_. [#pwd]_ The format is one path per
+ line with forward slashes as separator, the encoding is ``utf8``.
Set to ``-`` in order to write dependencies to stdout.
+ This option is particularly useful in conjunction with programs like
+ ``make`` using ``Makefile`` rules like::
+
+ ham.html: ham.txt $(shell cat hamdeps.txt)
+ rst2html.py --record-dependencies=hamdeps.txt ham.txt ham.html
+
+ If the filesystem encoding differs from utf8, replace the ``cat``
+ command with a call to a converter, e.g.::
+
+ $(shell iconv -f utf8 -t latin1 hamdeps.txt)
+
Default: None. Option: ``--record-dependencies``.
_`report_level`
@@ -1436,20 +1446,9 @@
do the overriding explicitly, by assigning ``None`` to the other
settings.
-.. [#dependencies] Some notes on the dependency recorder:
+.. [#dependencies] Images are only added to the dependency list if the
+ reStructuredText parser extracted image dimensions from the file.
- * Images are only added to the dependency list if the
- reStructuredText parser extracted image dimensions from the file.
-
- * Stylesheets are only added if they are embedded.
-
- * For practical reasons, the output of the LaTeX writer is
- considered merely an *intermediate* processing stage. The
- dependency recorder records all files the *rendered* file
- (e.g. in PDF or DVI format) depends on. Thus, images and
- stylesheets are both unconditionally recorded as dependencies
- when using the LaTeX writer.
-
.. [#footnote_space] The footnote space is trimmed if the reference
style is "superscript", and it is left if the reference style is
"brackets".
Modified: trunk/docutils/docutils/parsers/rst/directives/images.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/directives/images.py 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/docutils/parsers/rst/directives/images.py 2011-12-14 23:53:38 UTC (rev 7256)
@@ -10,18 +10,22 @@
import sys
+import urllib
from docutils import nodes, utils
from docutils.parsers.rst import Directive
from docutils.parsers.rst import directives, states
from docutils.nodes import fully_normalize_name, whitespace_normalize_name
from docutils.parsers.rst.roles import set_classes
-
-try:
- import Image as PIL # PIL
+try: # check for the Python Imaging Library
+ import PIL
except ImportError:
- PIL = None
+ try: # sometimes PIL modules are put in PYTHONPATH's root
+ import Image
+ class PIL(object): pass # dummy wrapper
+ PIL.Image = Image
+ except ImportError:
+ PIL = None
-
class Image(Directive):
align_h_values = ('left', 'center', 'right')
@@ -121,15 +125,17 @@
figure_node = nodes.figure('', image_node)
if figwidth == 'image':
if PIL and self.state.document.settings.file_insertion_enabled:
- # PIL doesn't like Unicode paths:
+ imagepath = urllib.url2pathname(image_node['uri'])
try:
- i = PIL.open(str(image_node['uri']))
- except (IOError, UnicodeError):
- pass
+ img = PIL.Image.open(
+ imagepath.encode(sys.getfilesystemencoding()))
+ except (IOError, UnicodeEncodeError):
+ pass # TODO: warn?
else:
self.state.document.settings.record_dependencies.add(
- image_node['uri'])
- figure_node['width'] = i.size[0]
+ imagepath.replace('\\', '/'))
+ figure_node['width'] = img.size[0]
+ del img
elif figwidth is not None:
figure_node['width'] = figwidth
if figclasses:
Modified: trunk/docutils/docutils/parsers/rst/directives/misc.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/directives/misc.py 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/docutils/parsers/rst/directives/misc.py 2011-12-14 23:53:38 UTC (rev 7256)
@@ -198,12 +198,14 @@
self.options['file']))
path = utils.relative_path(None, path)
try:
- self.state.document.settings.record_dependencies.add(path)
raw_file = io.FileInput(
source_path=path, encoding=encoding,
error_handler=(self.state.document.settings.\
input_encoding_error_handler),
handle_io_errors=None)
+ # TODO: currently, raw input files are recorded as
+ # dependencies even if not used for the chosen output format.
+ self.state.document.settings.record_dependencies.add(path)
except IOError, error:
raise self.severe(u'Problems with "%s" directive path:\n%s.'
% (self.name, ErrorString(error)))
Modified: trunk/docutils/docutils/utils.py
===================================================================
--- trunk/docutils/docutils/utils.py 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/docutils/utils.py 2011-12-14 23:53:38 UTC (rev 7256)
@@ -662,7 +662,7 @@
return taglist
-class DependencyList:
+class DependencyList(object):
"""
List of dependencies, with file recording support.
@@ -699,9 +699,7 @@
else:
of = output_file
self.file = FileOutput(destination_path=of,
- encoding=sys.getfilesystemencoding(),
- error_handler='xmlcharrefreplace',
- autoclose=False)
+ encoding='utf8', autoclose=False)
else:
self.file = None
@@ -725,8 +723,8 @@
self.file = None
def __repr__(self):
- if self.file:
+ try:
output_file = self.file.name
- else:
+ except AttributeError:
output_file = None
return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)
Modified: trunk/docutils/docutils/writers/html4css1/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/html4css1/__init__.py 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/docutils/writers/html4css1/__init__.py 2011-12-14 23:53:38 UTC (rev 7256)
@@ -19,10 +19,16 @@
import os.path
import time
import re
-try:
- import Image # check for the Python Imaging Library
+import urllib
+try: # check for the Python Imaging Library
+ import PIL
except ImportError:
- Image = None
+ try: # sometimes PIL modules are put in PYTHONPATH's root
+ import Image
+ class PIL(object): pass # dummy wrapper
+ PIL.Image = Image
+ except ImportError:
+ PIL = None
import docutils
from docutils import frontend, nodes, utils, writers, languages, io
from docutils.transforms import writer_aux
@@ -286,10 +292,10 @@
styles = [utils.relative_path(settings._destination, sheet)
for sheet in styles]
if settings.embed_stylesheet:
- settings.record_dependencies.add(*styles)
self.stylesheet = [self.embedded_stylesheet %
io.FileInput(source_path=sheet, encoding='utf-8').read()
for sheet in styles]
+ settings.record_dependencies.add(*styles)
else: # link to stylesheets
self.stylesheet = [self.stylesheet_link % self.encode(stylesheet)
for stylesheet in styles]
@@ -1006,18 +1012,22 @@
if 'height' in node:
atts['height'] = node['height']
if 'scale' in node:
- if Image and not ('width' in node and 'height' in node):
+ if (PIL and not ('width' in node and 'height' in node)
+ and self.settings.file_insertion_enabled):
+ imagepath = urllib.url2pathname(uri)
try:
- im = Image.open(str(uri))
- except (IOError, # Source image can't be found or opened
- UnicodeError): # PIL doesn't like Unicode paths.
- pass
+ img = PIL.Image.open(
+ imagepath.encode(sys.getfilesystemencoding()))
+ except (IOError, UnicodeEncodeError):
+ pass # TODO: warn?
else:
+ self.settings.record_dependencies.add(
+ imagepath.replace('\\', '/'))
if 'width' not in atts:
- atts['width'] = str(im.size[0])
+ atts['width'] = str(img.size[0])
if 'height' not in atts:
- atts['height'] = str(im.size[1])
- del im
+ atts['height'] = str(img.size[1])
+ del img
for att_name in 'width', 'height':
if att_name in atts:
match = re.match(r'([0-9.]+)(\S*)$', atts[att_name])
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2011-12-14 23:53:38 UTC (rev 7256)
@@ -1284,6 +1284,8 @@
# Unicode chars that are not recognized by LaTeX's utf8 encoding
unsupported_unicode_chars = {
0x00A0: ur'~', # NO-BREAK SPACE
+ # TODO: ensure white space also at the beginning of a line?
+ # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~'
0x00AD: ur'\-', # SOFT HYPHEN
#
0x2008: ur'\,', # PUNCTUATION SPACE
@@ -2225,9 +2227,8 @@
def visit_image(self, node):
self.requirements['graphicx'] = self.graphicx_package
attrs = node.attributes
- # Convert image URI to a local file path and add to dependency list
+ # Convert image URI to a local file path
imagepath = urllib.url2pathname(attrs['uri']).replace('\\', '/')
- self.settings.record_dependencies.add(imagepath)
# alignment defaults:
if not 'align' in attrs:
# Set default align of image in a figure to 'center'
Modified: trunk/docutils/test/data/dependencies.txt
===================================================================
--- trunk/docutils/test/data/dependencies.txt 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/test/data/dependencies.txt 2011-12-14 23:53:38 UTC (rev 7256)
@@ -1,10 +1,29 @@
-.. image:: some_image.png
+Test input for test_dependencies.
+Docutils can write a list of files required to generate the output like
+included files or embedded stylesheets. This is particularly useful in
+conjunction with programs like ``make``.
+
+Included files are recorded:
+
.. include:: include.txt
.. raw:: HTML
:file: raw.txt
+Dependencies are recorded only once:
+
.. include:: include.txt
-.. image:: картина.jpg
+Image files are only recorded, if actually accessed
+(to extract the size or if embedded in the output document):
+
+.. image:: test.jpg
+
+.. figure:: ../docs/user/rst/images/title.png
+ :figwidth: image
+
+Scaled images without given size are recorded by the html writer:
+
+.. image:: ../docs/user/rst/images/biohazard.png
+ :scale: 50 %
Modified: trunk/docutils/test/test_dependencies.py
===================================================================
--- trunk/docutils/test/test_dependencies.py 2011-12-14 19:14:33 UTC (rev 7255)
+++ trunk/docutils/test/test_dependencies.py 2011-12-14 23:53:38 UTC (rev 7256)
@@ -15,79 +15,108 @@
import docutils.core
import docutils.utils
import docutils.io
+from docutils.parsers.rst.directives.images import PIL
+# docutils.utils.DependencyList records POSIX paths,
+# i.e. "/" as a path separator even on Windows (not os.path.join).
+paths = {'include': u'data/include.txt', # included rst file
+ 'raw': u'data/raw.txt', # included raw "HTML file"
+ 'scaled-image': u'../docs/user/rst/images/biohazard.png',
+ 'figure-image': u'../docs/user/rst/images/title.png',
+ 'stylesheet': u'data/stylesheet.txt',
+ 'default-stylesheet': u'../docutils/writers/html4css1/html4css1.css',
+ }
+
class RecordDependenciesTests(unittest.TestCase):
- # docutils.utils.DependencyList records relative URLs, not platform paths,
- # so use "/" as a path separator even on Windows (not os.path.join).
-
def get_record(self, **settings):
recordfile = 'record.txt'
+ recorder = docutils.utils.DependencyList(recordfile)
+ # (Re) create the record file by running a conversion:
settings.setdefault('source_path',
os.path.join('data', 'dependencies.txt'))
settings.setdefault('settings_overrides', {})
- settings['settings_overrides'] = settings['settings_overrides'].copy()
- settings['settings_overrides']['_disable_config'] = 1
- if 'record_dependencies' not in settings['settings_overrides']:
- settings['settings_overrides']['record_dependencies'] = \
- docutils.utils.DependencyList(recordfile)
- docutils.core.publish_file(
- destination=DocutilsTestSupport.DevNull(), **settings)
- settings['settings_overrides']['record_dependencies'].close()
+ settings['settings_overrides'].update(_disable_config=True,
+ record_dependencies=recorder)
+ docutils.core.publish_file(destination=DocutilsTestSupport.DevNull(),
+ **settings)
+ recorder.close()
+ # Read the record file:
record = docutils.io.FileInput(source_path=recordfile,
encoding='utf8')
return record.read().splitlines()
def test_dependencies(self):
- self.assertEqual(self.get_record(),
- ['data/include.txt', 'data/raw.txt'])
- self.assertEqual(self.get_record(writer_name='latex'),
- ['data/include.txt',
- 'data/raw.txt',
- # this is a URL, not a path:
- 'some_image.png',
- # cyrillic filename (testing with an image, because
- # this does not abort if the file does not exist):
- u'\u043a\u0430\u0440\u0442\u0438\u043d\u0430.jpg'])
+ # Note: currently, raw input files are read (and hence recorded) while
+ # parsing even if not used in the chosen output format.
+ # This should change (see parsers/rst/directives/misc.py).
+ keys = ['include', 'raw']
+ if PIL:
+ keys += ['figure-image']
+ expected = [paths[key] for key in keys]
+ record = self.get_record(writer_name='xml')
+ # the order of the files is arbitrary
+ record.sort()
+ expected.sort()
+ self.assertEqual(record, expected)
+ def test_dependencies_html(self):
+ keys = ['include', 'raw', 'default-stylesheet']
+ if PIL:
+ keys += ['figure-image', 'scaled-image']
+ expected = [paths[key] for key in keys]
+ record = self.get_record(writer_name='html')
+ # the order of the files is arbitrary
+ record.sort()
+ expected.sort()
+ self.assertEqual(record, expected)
+
+ def test_dependencies_latex(self):
+ # since 0.9, the latex writer records only really accessed files, too
+ # Note: currently, raw input files are read (and hence recorded) while
+ # parsing even if not used in the chosen output format.
+ # This should change (see parsers/rst/directives/misc.py).
+ keys = ['include', 'raw']
+ if PIL:
+ keys += ['figure-image']
+ expected = [paths[key] for key in keys]
+ record = self.get_record(writer_name='latex')
+ # the order of the files is arbitrary
+ record.sort()
+ expected.sort()
+ self.assertEqual(record, expected)
+
def test_csv_dependencies(self):
try:
import csv
- self.assertEqual(
- self.get_record(source_path=os.path.join('data',
- 'csv_dep.txt')),
- ['data/csv_data.txt'])
+ csvsource = os.path.join('data', 'csv_dep.txt')
+ self.assertEqual(self.get_record(source_path=csvsource),
+ ['data/csv_data.txt'])
except ImportError:
pass
def test_stylesheet_dependencies(self):
- # Parameters to publish_file.
- s = {'settings_overrides': {}}
- so = s['settings_overrides']
- so['embed_stylesheet'] = 0
- # must use '/', not os.sep or os.path.join, because of URL handling
- # (see docutils.utils.relative_path):
- stylesheet_path = 'data/stylesheet.txt'
- so['stylesheet_path'] = stylesheet_path
- so['stylesheet'] = None
- s['writer_name'] = 'html'
- record = self.get_record(**s)
- self.assert_(stylesheet_path not in record,
- '%r should not be in %r' % (stylesheet_path, record))
- so['embed_stylesheet'] = 1
- record = self.get_record(**s)
- self.assert_(stylesheet_path in record,
- '%r should be in %r' % (stylesheet_path, record))
- s['writer_name'] = 'latex'
- record = self.get_record(**s)
- self.assert_(stylesheet_path in record,
- '%r should be in %r' % (stylesheet_path, record))
- del so['embed_stylesheet']
- record = self.get_record(**s)
- self.assert_(stylesheet_path not in record,
- '%r should not be in %r' % (stylesheet_path, record))
+ stylesheet = paths['stylesheet']
+ so = {'stylesheet_path': paths['stylesheet'],
+ 'stylesheet': None}
+ so['embed_stylesheet'] = False
+ record = self.get_record(writer_name='html', settings_overrides=so)
+ self.assert_(stylesheet not in record,
+ '%r should not be in %r' % (stylesheet, record))
+ record = self.get_record(writer_name='latex', settings_overrides=so)
+ self.assert_(stylesheet not in record,
+ '%r should not be in %r' % (stylesheet, record))
+ so['embed_stylesheet'] = True
+ record = self.get_record(writer_name='html', settings_overrides=so)
+ self.assert_(stylesheet in record,
+ '%r should be in %r' % (stylesheet, record))
+ record = self.get_record(writer_name='latex', settings_overrides=so)
+ self.assert_(stylesheet in record,
+ '%r should be in %r' % (stylesheet, record))
+
+
if __name__ == '__main__':
unittest.main()
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|