|
From: <go...@us...> - 2016-12-16 03:45:10
|
Revision: 7998
http://sourceforge.net/p/docutils/code/7998
Author: goodger
Date: 2016-12-16 03:45:07 +0000 (Fri, 16 Dec 2016)
Log Message:
-----------
Added functionality (plus tests & docs): escaped whitespace in URI contexts.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/RELEASE-NOTES.txt
trunk/docutils/docs/ref/rst/restructuredtext.txt
trunk/docutils/docutils/parsers/rst/states.py
trunk/docutils/docutils/utils/__init__.py
trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py
trunk/docutils/test/test_parsers/test_rst/test_targets.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/HISTORY.txt 2016-12-16 03:45:07 UTC (rev 7998)
@@ -13,11 +13,28 @@
.. contents::
+
Changes Since 0.13.1
====================
+* docs/ref/rst/restructuredtext.txt:
+
+ - Added documentation for escaped whitespace in URI contexts.
+
+* docutils/parsers/rst/states.py:
+
+ - Added functionality: escaped whitespace in URI contexts.
+
+* docutils/utils/__init__.py:
+
+ - Added ``split_escaped_whitespace`` function, support for escaped
+ whitespace in URI contexts.
+
+* tools/
+
- New front-end ``rst2html4.py``.
+
Release 0.13.1 (2016-12-09)
===========================
Modified: trunk/docutils/RELEASE-NOTES.txt
===================================================================
--- trunk/docutils/RELEASE-NOTES.txt 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/RELEASE-NOTES.txt 2016-12-16 03:45:07 UTC (rev 7998)
@@ -48,6 +48,12 @@
Changes Since 0.13.1
====================
+* docutils/parsers/rst/:
+
+ - Added functionality: escaped whitespace in URI contexts.
+
+* tools/
+
- New front-end ``rst2html4.py``.
Modified: trunk/docutils/docs/ref/rst/restructuredtext.txt
===================================================================
--- trunk/docutils/docs/ref/rst/restructuredtext.txt 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/docs/ref/rst/restructuredtext.txt 2016-12-16 03:45:07 UTC (rev 7998)
@@ -347,7 +347,8 @@
the markup. In reStructuredText we use the backslash, commonly used
as an escaping character in other domains.
-A backslash followed by any character (except whitespace characters)
+A backslash followed by any character (except whitespace characters
+in non-URI contexts)
escapes that character. The escaped character represents the
character itself, and is prevented from playing a role in any markup
interpretation. The backslash is removed from the output. A literal
@@ -355,9 +356,12 @@
backslash "escapes" the second, preventing it being interpreted in an
"escaping" role).
-Backslash-escaped whitespace characters are removed from the document.
+In non-URI contexts,
+backslash-escaped whitespace characters are removed from the document.
This allows for character-level `inline markup`_.
+In URIs, backslash-escaped whitespace represents a single space.
+
There are two contexts in which backslashes have no special meaning:
literal blocks and inline literals. In these contexts, a single
backslash represents a literal backslash, without having to double up.
@@ -1881,7 +1885,7 @@
explicit markup start and target name, or it may begin in an
indented text block immediately following, with no intervening
blank lines. If there are multiple lines in the link block, they
- are concatenated. Any whitespace is removed (whitespace is
+ are concatenated. Any unescaped whitespace is removed (whitespace is
permitted to allow for line wrapping). The following external
hyperlink targets are equivalent::
@@ -1894,6 +1898,10 @@
http://docutils.
sourceforge.net/rst.html
+ Escaped whitespace is preserved as intentional spaces, e.g.::
+
+ .. _reference: ../local\ path\ with\ spaces.html
+
If an external hyperlink target's URI contains an underscore as its
last character, it must be escaped to avoid being mistaken for an
indirect hyperlink target::
Modified: trunk/docutils/docutils/parsers/rst/states.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/states.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/docutils/parsers/rst/states.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -117,6 +117,7 @@
from docutils.parsers.rst.languages import en as _fallback_language_module
from docutils.utils import escape2null, unescape, column_width
from docutils.utils import punctuation_chars, roman, urischemes
+from docutils.utils import split_escaped_whitespace
class MarkupError(DataError): pass
class UnknownInterpretedRoleError(DataError): pass
@@ -807,7 +808,9 @@
target.indirect_reference_name = aliastext[:-1]
else:
aliastype = 'uri'
- alias = ''.join(aliastext.split())
+ alias_parts = split_escaped_whitespace(match.group(2))
+ alias = ' '.join(''.join(unescape(part).split())
+ for part in alias_parts)
alias = self.adjust_uri(alias)
if alias.endswith(r'\_'):
alias = alias[:-2] + '_'
@@ -1958,8 +1961,10 @@
refname = self.is_reference(reference)
if refname:
return 'refname', refname
- reference = ''.join([''.join(line.split()) for line in block])
- return 'refuri', unescape(reference)
+ ref_parts = split_escaped_whitespace(' '.join(block))
+ reference = ' '.join(''.join(unescape(part).split())
+ for part in ref_parts)
+ return 'refuri', reference
def is_reference(self, reference):
match = self.explicit.patterns.reference.match(
Modified: trunk/docutils/docutils/utils/__init__.py
===================================================================
--- trunk/docutils/docutils/utils/__init__.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/docutils/utils/__init__.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -13,6 +13,7 @@
import os
import os.path
import re
+import itertools
import warnings
import unicodedata
from docutils import ApplicationError, DataError
@@ -575,7 +576,7 @@
parts.append('\x00' + text[found+1:found+2])
start = found + 2 # skip character after escape
-def unescape(text, restore_backslashes=False):
+def unescape(text, restore_backslashes=False, respect_whitespace=False):
"""
Return a string with nulls removed or restored to backslashes.
Backslash-escaped spaces are also removed.
@@ -587,6 +588,16 @@
text = ''.join(text.split(sep))
return text
+def split_escaped_whitespace(text):
+ """
+ Split `text` on escaped whitespace (null+space or null+newline).
+ Return a list of strings.
+ """
+ strings = text.split('\x00 ')
+ strings = [string.split('\x00\n') for string in strings]
+ # flatten list of lists of strings to list of strings:
+ return list(itertools.chain(*strings))
+
def strip_combining_chars(text):
if isinstance(text, str) and sys.version_info < (3,0):
return text
Modified: trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -981,6 +981,21 @@
<reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
embedded URI with whitespace
"""],
+[r"""
+`embedded URI with escaped whitespace <http://example.com/a\
+long/path\ and/some\ escaped\ whitespace>`__
+
+`<omitted\ reference\ text\ with\ escaped\ whitespace>`__
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ <reference name="embedded URI with escaped whitespace" refuri="http://example.com/a long/path and/some escaped whitespace">
+ embedded URI with escaped whitespace
+ <paragraph>
+ <reference name="omitted reference text with escaped whitespace" refuri="omitted reference text with escaped whitespace">
+ omitted reference text with escaped whitespace
+"""],
["""\
`embedded email address <jd...@ex...>`__
@@ -1140,6 +1155,15 @@
<reference name="embedded alias with whitespace" refname="alias long phrase">
embedded alias with whitespace
"""],
+["""\
+`<embedded alias with whitespace_>`__
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ <reference name="embedded alias with whitespace" refname="embedded alias with whitespace">
+ embedded alias with whitespace
+"""],
[r"""
`no embedded alias (whitespace inside bracket) < alias_ >`__
Modified: trunk/docutils/test/test_parsers/test_rst/test_targets.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_targets.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/test/test_parsers/test_rst/test_targets.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -36,7 +36,7 @@
<document source="test data">
<target ids="optional-space-before-colon" names="optional\ space\ before\ colon">
"""],
-["""\
+[r"""
External hyperlink targets:
.. _one-liner: http://structuredtext.sourceforge.net
@@ -49,7 +49,10 @@
http://structuredtext.
sourceforge.net
-.. _not-indirect: uri\\_
+.. _escaped-whitespace: http://example.org/a\ path\ with\
+ spaces.html
+
+.. _not-indirect: uri\_
""",
"""\
<document source="test data">
@@ -58,6 +61,7 @@
<target ids="one-liner" names="one-liner" refuri="http://structuredtext.sourceforge.net">
<target ids="starts-on-this-line" names="starts-on-this-line" refuri="http://structuredtext.sourceforge.net">
<target ids="entirely-below" names="entirely-below" refuri="http://structuredtext.sourceforge.net">
+ <target ids="escaped-whitespace" names="escaped-whitespace" refuri="http://example.org/a path with spaces.html">
<target ids="not-indirect" names="not-indirect" refuri="uri_">
"""],
["""\
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2016-12-19 20:51:25
|
Revision: 8003
http://sourceforge.net/p/docutils/code/8003
Author: milde
Date: 2016-12-19 20:51:23 +0000 (Mon, 19 Dec 2016)
Log Message:
-----------
Recognize non-ASCII whitespace around inline literal, target and substitution.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/parsers/rst/states.py
trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2016-12-16 13:18:55 UTC (rev 8002)
+++ trunk/docutils/HISTORY.txt 2016-12-19 20:51:23 UTC (rev 8003)
@@ -24,6 +24,8 @@
* docutils/parsers/rst/states.py:
- Added functionality: escaped whitespace in URI contexts.
+ - Recognize non-ASCII whitespace around inline literal, target,
+ and substitution.
* docutils/utils/__init__.py:
@@ -42,10 +44,10 @@
docutils/parsers/rst/languages/fa.py
docutils/languages/la.py
docutils/parsers/rst/languages/la.py:
-
+
- Apply [ 133 ] Persian mappings by Shahin Azad.
- Apply [ 135 ] Language modules for Latvian by Alexander Smishlajev
-
+
* docutils/nodes.py
- Fix [ 253 ] Attribute key without value not allowed in XML.
@@ -93,7 +95,7 @@
* docutils/writers/_html_base.py
- New auxiliary module for definitions common to all HTML writers.
-
+
* docutils/writers/html5_polyglot/
- New HTML writer generating clean, polyglot_ markup conforming to
@@ -131,7 +133,7 @@
- Fix [ 286 ] Empty column title cause invalid latex file.
- Fix [ 224 ] Fix rowspan support for tables.
-
+
- Let LaTeX determine the column widths in tables with "colwidths-auto".
Not suited for with multi-paragraph cells!
Modified: trunk/docutils/docutils/parsers/rst/states.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/states.py 2016-12-16 13:18:55 UTC (rev 8002)
+++ trunk/docutils/docutils/parsers/rst/states.py 2016-12-19 20:51:23 UTC (rev 8003)
@@ -541,12 +541,12 @@
$ # end of string
""" % args, re.VERBOSE | re.UNICODE),
literal=re.compile(self.non_whitespace_before + '(``)'
- + end_string_suffix),
+ + end_string_suffix, re.UNICODE),
target=re.compile(self.non_whitespace_escape_before
- + r'(`)' + end_string_suffix),
+ + r'(`)' + end_string_suffix, re.UNICODE),
substitution_ref=re.compile(self.non_whitespace_escape_before
+ r'(\|_{0,2})'
- + end_string_suffix),
+ + end_string_suffix, re.UNICODE),
email=re.compile(self.email_pattern % args + '$',
re.VERBOSE | re.UNICODE),
uri=re.compile(
Modified: trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py 2016-12-16 13:18:55 UTC (rev 8002)
+++ trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py 2016-12-19 20:51:23 UTC (rev 8003)
@@ -1791,6 +1791,47 @@
<emphasis>
LINE SEPARATOR
"""],
+[u"""\
+inline markup separated by non-ASCII whitespace
+\xa0**NO-BREAK SPACE**\xa0, \xa0``NO-BREAK SPACE``\xa0, \xa0`NO-BREAK SPACE`\xa0,
+\u2000**EN QUAD**\u2000, \u2000``EN QUAD``\u2000, \u2000`EN QUAD`\u2000,
+\u202f**NARROW NBSP**\u202f, \u202f``NARROW NBSP``\u202f, \u202f`NARROW NBSP`\u202f,
+""",
+u"""\
+<document source="test data">
+ <paragraph>
+ inline markup separated by non-ASCII whitespace
+ \xa0
+ <strong>
+ NO-BREAK SPACE
+ \xa0, \xa0
+ <literal>
+ NO-BREAK SPACE
+ \xa0, \xa0
+ <title_reference>
+ NO-BREAK SPACE
+ \xa0,
+ \u2000
+ <strong>
+ EN QUAD
+ \u2000, \u2000
+ <literal>
+ EN QUAD
+ \u2000, \u2000
+ <title_reference>
+ EN QUAD
+ \u2000,
+ \u202f
+ <strong>
+ NARROW NBSP
+ \u202f, \u202f
+ <literal>
+ NARROW NBSP
+ \u202f, \u202f
+ <title_reference>
+ NARROW NBSP
+ \u202f,
+"""],
# « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
[u"""\
"Quoted" markup start-string (matched openers & closers) -> no markup:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-01-03 16:14:03
|
Revision: 8010
http://sourceforge.net/p/docutils/code/8010
Author: milde
Date: 2017-01-03 16:14:00 +0000 (Tue, 03 Jan 2017)
Log Message:
-----------
improve backwards compatibility of patch [ 120 ]
Patch 120 (tables accept option widths: list of relative widths, auto or
grid) changed the API in parsers.rst.directives.tables
This patch lets get_column_widths() return one result again.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/parsers/rst/directives/tables.py
trunk/docutils/docutils/parsers/rst/states.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-01-02 21:11:35 UTC (rev 8009)
+++ trunk/docutils/HISTORY.txt 2017-01-03 16:14:00 UTC (rev 8010)
@@ -26,7 +26,13 @@
- Added functionality: escaped whitespace in URI contexts.
- Recognize non-ASCII whitespace around inline literal, target,
and substitution.
+ - improve backwards compatibility of patch [ 120 ]
+* docutils/parsers/rst/directives/tables.py
+
+ - improve backwards compatibility of patch [ 120 ] (tables accept
+ option widths: list of relative widths, 'auto' or 'grid').
+
* docutils/utils/__init__.py:
- Added ``split_escaped_whitespace`` function, support for escaped
Modified: trunk/docutils/docutils/parsers/rst/directives/tables.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/directives/tables.py 2017-01-02 21:11:35 UTC (rev 8009)
+++ trunk/docutils/docutils/parsers/rst/directives/tables.py 2017-01-03 16:14:00 UTC (rev 8010)
@@ -112,13 +112,7 @@
'No table data detected in CSV file.', nodes.literal_block(
self.block_text, self.block_text), line=self.lineno)
raise SystemMessagePropagation(error)
- if self.widths == 'auto':
- widths = 'auto'
- elif self.widths: # "grid" or list of integers
- widths = 'given'
- else:
- widths = self.widths
- return widths, col_widths
+ return col_widths
def extend_short_rows_with_empty_cells(self, columns, parts):
for part in parts:
@@ -253,7 +247,7 @@
self.check_table_dimensions(rows, header_rows, stub_columns)
table_head.extend(rows[:header_rows])
table_body = rows[header_rows:]
- widths, col_widths = self.get_column_widths(max_cols)
+ col_widths = self.get_column_widths(max_cols)
self.extend_short_rows_with_empty_cells(max_cols,
(table_head, table_body))
except SystemMessagePropagation, detail:
@@ -269,7 +263,7 @@
return [error]
table = (col_widths, table_head, table_body)
table_node = self.state.build_table(table, self.content_offset,
- stub_columns, widths=widths)
+ stub_columns, widths=self.widths)
table_node['classes'] += self.options.get('class', [])
if 'align' in self.options:
table_node['align'] = self.options.get('align')
@@ -413,7 +407,7 @@
node = nodes.Element() # anonymous container for parsing
self.state.nested_parse(self.content, self.content_offset, node)
try:
- num_cols, widths, col_widths = self.check_list_content(node)
+ num_cols, col_widths = self.check_list_content(node)
table_data = [[item.children for item in row_list[0]]
for row_list in node[0]]
header_rows = self.options.get('header-rows', 0)
@@ -421,7 +415,7 @@
self.check_table_dimensions(table_data, header_rows, stub_columns)
except SystemMessagePropagation, detail:
return [detail.args[0]]
- table_node = self.build_table_from_list(table_data, widths, col_widths,
+ table_node = self.build_table_from_list(table_data, col_widths,
header_rows, stub_columns)
if 'align' in self.options:
table_node['align'] = self.options.get('align')
@@ -467,14 +461,15 @@
raise SystemMessagePropagation(error)
else:
num_cols = len(item[0])
- widths, col_widths = self.get_column_widths(num_cols)
- return num_cols, widths, col_widths
+ col_widths = self.get_column_widths(num_cols)
+ return num_cols, col_widths
- def build_table_from_list(self, table_data, widths, col_widths, header_rows,
- stub_columns):
+ def build_table_from_list(self, table_data, col_widths, header_rows, stub_columns):
table = nodes.table()
- if widths:
- table['classes'] += ['colwidths-%s' % widths]
+ if self.widths == 'auto':
+ table['classes'] += ['colwidths-auto']
+ elif self.widths: # "grid" or list of integers
+ table['classes'] += ['colwidths-given']
tgroup = nodes.tgroup(cols=len(col_widths))
table += tgroup
for col_width in col_widths:
Modified: trunk/docutils/docutils/parsers/rst/states.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/states.py 2017-01-02 21:11:35 UTC (rev 8009)
+++ trunk/docutils/docutils/parsers/rst/states.py 2017-01-03 16:14:00 UTC (rev 8010)
@@ -1770,8 +1770,10 @@
def build_table(self, tabledata, tableline, stub_columns=0, widths=None):
colwidths, headrows, bodyrows = tabledata
table = nodes.table()
- if widths:
- table['classes'] += ['colwidths-%s' % widths]
+ if widths == 'auto':
+ table['classes'] += ['colwidths-auto']
+ elif widths: # "grid" or list of integers
+ table['classes'] += ['colwidths-given']
tgroup = nodes.tgroup(cols=len(colwidths))
table += tgroup
for colwidth in colwidths:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-01-03 23:08:21
|
Revision: 8012
http://sourceforge.net/p/docutils/code/8012
Author: milde
Date: 2017-01-03 23:08:19 +0000 (Tue, 03 Jan 2017)
Log Message:
-----------
Minor documentation update
Modified Paths:
--------------
trunk/docutils/README.txt
trunk/docutils/docs/dev/policies.txt
trunk/docutils/docs/user/tools.txt
trunk/docutils/docutils/parsers/rst/languages/sv.py
trunk/docutils/docutils/writers/html5_polyglot/__init__.py
Modified: trunk/docutils/README.txt
===================================================================
--- trunk/docutils/README.txt 2017-01-03 21:56:17 UTC (rev 8011)
+++ trunk/docutils/README.txt 2017-01-03 23:08:19 UTC (rev 8012)
@@ -103,7 +103,7 @@
To run the code, Python_ must be installed.
Docutils is compatible with Python versions from 2.4 up to 2.7 and
-versions 3.1 and 3.2 (cf. `Python 3 compatibility`_).
+versions 3.1 to 3.5 (cf. `Python 3 compatibility`_).
Docutils uses the following packages for enhanced functionality, if they are
installed:
@@ -138,9 +138,6 @@
* When editing the source, do changes on the Python 2 versions of the
files and re-run the build command.
-Using Docutils with Python 3.x is less tested and might still have some
-issues.
-
.. _porting to Python 3: http://docs.python.org/py3k/howto/pyporting.html
Modified: trunk/docutils/docs/dev/policies.txt
===================================================================
--- trunk/docutils/docs/dev/policies.txt 2017-01-03 21:56:17 UTC (rev 8011)
+++ trunk/docutils/docs/dev/policies.txt 2017-01-03 23:08:19 UTC (rev 8012)
@@ -86,9 +86,10 @@
cases are also examples and showcases for new features. See `Docutils
Testing`_ for a description of the test suite in ``docutils/test/``.
- Ensure the addition works with all supported Python versions
- (2.4 ... 3.4).
+ Ensure the addition works with all `supported Python versions`__.
+ __ ../../README.html#requirements
+
* Look at the Docutils sources to see how similar features are implemented,
learn to do it "the Docutils way".
Modified: trunk/docutils/docs/user/tools.txt
===================================================================
--- trunk/docutils/docs/user/tools.txt 2017-01-03 21:56:17 UTC (rev 8011)
+++ trunk/docutils/docs/user/tools.txt 2017-01-03 23:08:19 UTC (rev 8012)
@@ -121,7 +121,7 @@
:Parser: reStructuredText
:Writer: html4css1_
-The ``rst2html.py`` front end reads standalone reStructuredText source
+The ``rst2html4.py`` front end reads standalone reStructuredText source
files and produces `XHTML 1.0 Transitional`_ output.
A CSS stylesheet is required for proper rendering; a simple but
complete stylesheet is installed and used by default (see Stylesheets_
Modified: trunk/docutils/docutils/parsers/rst/languages/sv.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/languages/sv.py 2017-01-03 21:56:17 UTC (rev 8011)
+++ trunk/docutils/docutils/parsers/rst/languages/sv.py 2017-01-03 23:08:19 UTC (rev 8012)
@@ -14,7 +14,6 @@
__docformat__ = 'reStructuredText'
-
directives = {
u'observera': 'attention',
u'akta': 'caution', # also 'försiktigt'
@@ -31,7 +30,7 @@
u'ämne': 'topic',
u'tema': 'topic',
u'rad-block': 'line-block',
- u'parsed-literal (translation required)': 'parsed-literal',
+ u'parsed-literal (translation required)': 'parsed-literal', # 'tolkad-bokstavlig'?
u'rubrik': 'rubric',
u'epigraf': 'epigraph',
u'höjdpunkter': 'highlights',
Modified: trunk/docutils/docutils/writers/html5_polyglot/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/html5_polyglot/__init__.py 2017-01-03 21:56:17 UTC (rev 8011)
+++ trunk/docutils/docutils/writers/html5_polyglot/__init__.py 2017-01-03 23:08:19 UTC (rev 8012)
@@ -1,9 +1,8 @@
# .. coding: utf8
+# $Id$
# :Author: Günter Milde <mi...@us...>
# Based on the html4css1 writer by David Goodger.
# :Maintainer: doc...@li...
-# :Revision: $Revision$
-# :Date: $Date: 2005-06-28$
# :Copyright: © 2005, 2009, 2015 Günter Milde,
# portions from html4css1 © David Goodger.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-01-04 22:58:25
|
Revision: 8013
http://sourceforge.net/p/docutils/code/8013
Author: milde
Date: 2017-01-04 22:58:22 +0000 (Wed, 04 Jan 2017)
Log Message:
-----------
Clarify use of Unicode character categories.
Make generation of punctuation char samples py3k safe.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docs/ref/rst/restructuredtext.txt
trunk/docutils/docutils/utils/punctuation_chars.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-01-03 23:08:19 UTC (rev 8012)
+++ trunk/docutils/HISTORY.txt 2017-01-04 22:58:22 UTC (rev 8013)
@@ -20,18 +20,18 @@
* docs/ref/rst/restructuredtext.txt:
- Added documentation for escaped whitespace in URI contexts.
+ - Clarified use of Unicode character categories.
* docutils/parsers/rst/states.py:
- Added functionality: escaped whitespace in URI contexts.
- Recognize non-ASCII whitespace around inline literal, target,
and substitution.
- - improve backwards compatibility of patch [ 120 ]
* docutils/parsers/rst/directives/tables.py
- - improve backwards compatibility of patch [ 120 ] (tables accept
- option widths: list of relative widths, 'auto' or 'grid').
+ - Rework patch [ 120 ] (revert change to ``Table.get_column_widths()``
+ that led to problems in an application with a custom table directive).
* docutils/utils/__init__.py:
Modified: trunk/docutils/docs/ref/rst/restructuredtext.txt
===================================================================
--- trunk/docutils/docs/ref/rst/restructuredtext.txt 2017-01-03 23:08:19 UTC (rev 8012)
+++ trunk/docutils/docs/ref/rst/restructuredtext.txt 2017-01-04 22:58:22 UTC (rev 8013)
@@ -2442,11 +2442,11 @@
See `Escaping Mechanism`_ above for details.
5. If an inline markup start-string is immediately preceded by one of the
- ASCII characters ``' " < ( [ {`` or a similar Unicode character\ [#]_, it
- must not be followed by the corresponding closing character from ``' " )
- ] } >`` or a similar Unicode character\ [#]_.
- (For quotes, corresponding characters can be any of the `quotation marks
- in international usage`_.)
+ ASCII characters ``' " < ( [ {`` or a similar
+ Unicode character [#openers]_, it must not be followed by the
+ corresponding closing character from ``' " ) ] } >`` or a similar Unicode
+ character [#closers]_. (For quotes, matching characters can be any of
+ the `quotation marks in international usage`_.)
If the configuration setting `simple-inline-markup`_ is False (default),
additional conditions apply to the characters "around" the
@@ -2457,25 +2457,31 @@
* whitespace,
* one of the ASCII characters ``- : / ' " < ( [ {``
- * or a similar Unicode punctuation character.\ [#]_
+ * or a similar Unicode punctuation character. [#pre-chars]_
7. Inline markup end-strings must end a text block or be immediately
followed by
* whitespace,
* one of the ASCII characters ``- . , : ; ! ? \ / ' " ) ] } >``
- * or a similar Unicode punctuation character.\ [#]_
+ * or a similar Unicode punctuation character. [#post-chars]_
-.. [#] `Unicode categories`_ `Ps`, `Pi`, or `Pf`
-.. [#] Unicode categories `Pe`, `Pf`, or `Pi`
-.. [#] Unicode categories `Pd` (Dash), `Po` (Other), `Pi` (Initial quote),
- `Pf` (Final quote), or `Ps` (Open)
-.. [#] Unicode categories `Pd` (Dash), `Po` (Other), `Pi` (Initial quote),
- `Pf` (Final quote), or `Pe` (Close)
+.. [#openers] `Unicode categories`_ `Ps` (Open), `Pi` (Initial quote),
+ or `Pf` (Final quote). [#uni-version]_
+.. [#closers] Unicode categories `Pe` (Close), `Pi` (Initial quote),
+ or `Pf` (Final quote). [#uni-version]_
+.. [#pre-chars] Unicode categories `Ps` (Open), `Pi` (Initial quote),
+ `Pf` (Final quote), `Pd` (Dash), or `Po` (Other). [#uni-version]_
+.. [#post-chars] Unicode categories `Pe` (Close), `Pi` (Initial quote),
+ `Pf` (Final quote), `Pd` (Dash), or `Po` (Other). [#uni-version]_
+.. [#uni-version] The category of some characters changed with the
+ development of the Unicode standard.
+ Docutils 0.13 uses `Unicode version 5.2.0`_.
+
.. _Unicode categories:
http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values
-
+.. _Unicode version 5.2.0: http://www.unicode.org/Public/5.2.0/
.. _quotation marks in international usage:
http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage
Modified: trunk/docutils/docutils/utils/punctuation_chars.py
===================================================================
--- trunk/docutils/docutils/utils/punctuation_chars.py 2017-01-03 23:08:19 UTC (rev 8012)
+++ trunk/docutils/docutils/utils/punctuation_chars.py 2017-01-04 22:58:22 UTC (rev 8013)
@@ -11,6 +11,8 @@
# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
# :Id: $Id$
+#
+# ::
import sys, re
import unicodedata
@@ -20,16 +22,31 @@
#
# This module provides the lists of characters for the implementation of
# the `inline markup recognition rules`_ in the reStructuredText parser
-# (states.py)
+# `<../parsers/rst/states.py>`__.
+# For efficiency and stability, the lists are pre-generated and stored in
+# module-level variables.
#
+# The category of some characters changed with the development of the Unicode
+# standard. The current lists are generated with the help of the "unicodedata"
+# module of Python 2.7 (based on the UnicodeData.txt file version 5.2.0).
+#
+# Running this file as a standalone module (``__main__``) checks the
+# definitions against a re-calculation. Updating the lists with a new
+# Unicode standard version is an API change (may render valid rST documents
+# invalid). It should only be done for "feature releases" and requires
+# also updating the specification of `inline markup recognition rules`_ in
+# ../../docs/ref/rst/restructuredtext.txt.
+#
# .. _inline markup recognition rules:
# ../../docs/ref/rst/restructuredtext.html#inline-markup
-
+#
+#
# Docutils punctuation category sample strings
# --------------------------------------------
#
# The sample strings are generated by punctuation_samples() and put here
-# literal to avoid the time-consuming generation with every Docutils run.
+# literal to avoid the time-consuming generation with every Docutils run
+# and prevent dependance on the Python version.
# As the samples are used inside ``[ ]`` in regular expressions, hyphen and
# square brackets are escaped. ::
@@ -83,12 +100,14 @@
# Matching open/close quotes
# --------------------------
-
+#
# Rule (5) requires determination of matching open/close pairs. However,
# the pairing of open/close quotes is ambigue due to different typographic
-# conventions in different languages.
+# conventions in different languages. The following dictionary specifies
+# additional valid matches::
-quote_pairs = {u'\xbb': u'\xbb', # Swedish
+quote_pairs = {# open char: matching closing characters
+ u'\xbb': u'\xbb', # Swedish
u'\u2018': u'\u201a', # Greek
u'\u2019': u'\u2019', # Swedish
u'\u201a': u'\u2018\u2019', # German, Polish
@@ -99,11 +118,12 @@
}
def match_chars(c1, c2):
+ """Test whether `c1` and `c2` are a matching open/close character pair."""
try:
i = openers.index(c1)
except ValueError: # c1 not in openers
return False
- return c2 == closers[i] or c2 in quote_pairs.get(c1, '')
+ return c2 == closers[i] or c2 in quote_pairs.get(c1, u'')
# Running this file as a standalone module checks the definitions against a
@@ -114,6 +134,10 @@
# Unicode punctuation character categories
# ----------------------------------------
+#
+# For details about Unicode categories, see
+# http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values
+# ::
unicode_punctuation_categories = {
# 'Pc': 'Connector', # not used in Docutils inline markup recognition
@@ -129,6 +153,8 @@
# generate character pattern strings
# ==================================
+#
+# ::
def unicode_charlists(categories, cp_min=0, cp_max=None):
"""Return dictionary of Unicode character lists.
@@ -157,6 +183,8 @@
# Character categories in Docutils
# --------------------------------
+#
+# ::
def punctuation_samples():
@@ -203,18 +231,18 @@
# allowed before markup if there is a matching closer
openers = [u'"\'(<\\[{']
- for cat in ('Ps', 'Pi', 'Pf'):
- openers.extend(ucharlists[cat])
+ for category in ('Ps', 'Pi', 'Pf'):
+ openers.extend(ucharlists[category])
# allowed after markup if there is a matching opener
closers = [u'"\')>\\]}']
- for cat in ('Pe', 'Pf', 'Pi'):
- closers.extend(ucharlists[cat])
+ for category in ('Pe', 'Pf', 'Pi'):
+ closers.extend(ucharlists[category])
# non-matching, allowed on both sides
delimiters = [u'\\-/:']
- for cat in ('Pd', 'Po'):
- delimiters.extend(ucharlists[cat])
+ for category in ('Pd', 'Po'):
+ delimiters.extend(ucharlists[category])
# non-matching, after markup
closing_delimiters = [r'\\.,;!?']
@@ -260,13 +288,13 @@
return ''.join(l2)
- def wrap_string(s, startstring= "(",
- endstring = ")", wrap=65):
+ def wrap_string(s, startstring= "(u'",
+ endstring = "')", wrap=65):
"""Line-wrap a unicode string literal definition."""
c = len(startstring)
- contstring = "'\n" + ' ' * len(startstring) + "u'"
+ contstring = "'\n" + ' ' * (len(startstring)-2) + "u'"
l = [startstring]
- for ch in s:
+ for ch in s.replace("'", r"\'"):
c += 1
if ch == '\\' and c > wrap:
c = len(startstring)
@@ -276,10 +304,25 @@
return ''.join(l)
+ def print_differences(old, new, name):
+ """List characters missing in old/new."""
+ if old != new:
+ print('new %s:' % name)
+ for c in new:
+ if c not in old:
+ print ' %04x'%ord(c), unicodedata.name(c)
+ print('removed %s:' % name)
+ for c in old:
+ if c not in new:
+ print ' %04x'%ord(c), unicodedata.name(c)
+
+
# print results
# =============
-
+#
# (re) create and compare the samples:
+#
+# ::
(o, c, d, cd) = punctuation_samples()
o, o_wide = separate_wide_chars(o)
@@ -287,40 +330,44 @@
d, d_wide = separate_wide_chars(d)
d = d[:5] + mark_intervals(d[5:])
d_wide = mark_intervals(d_wide)
- if sys.maxunicode >= 0x10FFFF: # "wide" build
- d += d_wide
- if o != openers:
- print '- openers = ur"""%s"""' % openers.encode('utf8')
- print '+ openers = ur"""%s"""' % o.encode('utf8')
+
+ print_differences(openers, o, 'openers')
if o_wide:
print '+ openers-wide = ur"""%s"""' % o_wide.encode('utf8')
- if c != closers:
- print '- closers = ur"""%s"""' % closers.encode('utf8')
- print '+ closers = ur"""%s"""' % c.encode('utf8')
+ print_differences(closers, c, 'closers')
if c_wide:
print '+ closers-wide = ur"""%s"""' % c_wide.encode('utf8')
- if d != delimiters:
- print '- delimiters = ur"%s"' % delimiters.encode('utf8')
- print '+ delimiters = ur"%s"' % d.encode('utf8')
- if cd != closing_delimiters:
- print '- closing_delimiters = ur"%s"' % closing_delimiters.encode('utf8')
- print '+ closing_delimiters = ur"%s"' % cd.encode('utf8')
- # closing_delimiters are all ASCII characters
+ print_differences(delimiters, d + d_wide, 'delimiters')
+ print_differences(closing_delimiters, cd, 'closing_delimiters')
+
# Print literal code to define the character sets:
+#
+# ::
+ print '# based on Unicode version', unicodedata.unidata_version
+
# `openers` and `closers` must be verbose and keep order because they are
# also used in `match_chars()`.
- print wrap_string(repr(o), startstring='openers = (')
- print wrap_string(repr(c), startstring='closers = (')
+ print wrap_string(o.encode('unicode-escape').decode(),
+ startstring="openers = (u'")
+ print wrap_string(c.encode('unicode-escape').decode(),
+ startstring="closers = (u'")
# delimiters: sort and use shortcut for intervals (saves ~150 characters):
- print wrap_string(repr(d), startstring='delimiters = (')
+ print wrap_string(d.encode('unicode-escape').decode(),
+ startstring="delimiters = (u'")
# add characters in the upper plane only in a "wide" build:
print 'if sys.maxunicode >= 0x10FFFF: # "wide" build'
- print wrap_string(repr(d_wide), startstring=' delimiters += (')
- print 'closing_delimiters =', repr(cd)
+ print wrap_string(d_wide.encode('unicode-escape').decode(),
+ startstring=" delimiters += (u'")
+ # additional closing delimiters:
+ print wrap_string(cd.encode('unicode-escape').decode(),
+ startstring="closing_delimiters = (u'")
+
# test prints
+#
+# ::
# print "wide" Unicode characters:
# ucharlists = unicode_charlists(unicode_punctuation_categories)
@@ -340,6 +387,8 @@
# print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
# verbose print
+#
+# ::
# print 'openers:'
# for ch in openers:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-01-05 09:49:29
|
Revision: 8014
http://sourceforge.net/p/docutils/code/8014
Author: milde
Date: 2017-01-05 09:49:26 +0000 (Thu, 05 Jan 2017)
Log Message:
-----------
"Outsourcing" of development code from utils.punctuation_chars.
Modified Paths:
--------------
trunk/docutils/docutils/utils/punctuation_chars.py
Added Paths:
-----------
trunk/docutils/tools/dev/generate_punctuation_chars.py
Modified: trunk/docutils/docutils/utils/punctuation_chars.py
===================================================================
--- trunk/docutils/docutils/utils/punctuation_chars.py 2017-01-04 22:58:22 UTC (rev 8013)
+++ trunk/docutils/docutils/utils/punctuation_chars.py 2017-01-05 09:49:26 UTC (rev 8014)
@@ -17,38 +17,37 @@
import sys, re
import unicodedata
-# punctuation characters around inline markup
-# ===========================================
+# Docutils character category patterns
+# ------------------------------------
#
-# This module provides the lists of characters for the implementation of
-# the `inline markup recognition rules`_ in the reStructuredText parser
-# `<../parsers/rst/states.py>`__.
-# For efficiency and stability, the lists are pre-generated and stored in
-# module-level variables.
+# This module provides patterns for the implementation of the
+# `inline markup recognition rules`_ in the reStructuredText parser
+# `<../parsers/rst/states.py>`__ based on Unicode character categories.
+# The patterns are used inside ``[ ]`` in regular expressions.
#
+# Rule (5) requires determination of matching open/close pairs. However,
+# the pairing of open/close quotes is ambigue due to different typographic
+# conventions in different languages. The ``quote_pairs`` function tests
+# whether two characters form an open/close pair.
+#
+# The patterns are generated by
+# ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependance
+# on the Python version and avoid the time-consuming generation with every
+# Docutils run. See there for motives and implementation details.
+#
# The category of some characters changed with the development of the Unicode
# standard. The current lists are generated with the help of the "unicodedata"
# module of Python 2.7 (based on the UnicodeData.txt file version 5.2.0).
#
-# Running this file as a standalone module (``__main__``) checks the
-# definitions against a re-calculation. Updating the lists with a new
-# Unicode standard version is an API change (may render valid rST documents
-# invalid). It should only be done for "feature releases" and requires
-# also updating the specification of `inline markup recognition rules`_ in
-# ../../docs/ref/rst/restructuredtext.txt.
+# Updating the patterns with a new Unicode standard version is an API
+# change (may render valid rST documents invalid). It should only be done for
+# "feature releases" and requires also updating the specification of `inline
+# markup recognition rules`_ in ../../docs/ref/rst/restructuredtext.txt.
#
# .. _inline markup recognition rules:
# ../../docs/ref/rst/restructuredtext.html#inline-markup
#
-#
-# Docutils punctuation category sample strings
-# --------------------------------------------
-#
-# The sample strings are generated by punctuation_samples() and put here
-# literal to avoid the time-consuming generation with every Docutils run
-# and prevent dependance on the Python version.
-# As the samples are used inside ``[ ]`` in regular expressions, hyphen and
-# square brackets are escaped. ::
+# ::
openers = (u'"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768'
u'\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea'
@@ -101,10 +100,8 @@
# Matching open/close quotes
# --------------------------
#
-# Rule (5) requires determination of matching open/close pairs. However,
-# the pairing of open/close quotes is ambigue due to different typographic
-# conventions in different languages. The following dictionary specifies
-# additional valid matches::
+# The pairing of open/close quotes is ambigue due to different typographic
+# conventions in different languages. Specify additional valid matches::
quote_pairs = {# open char: matching closing characters
u'\xbb': u'\xbb', # Swedish
@@ -124,281 +121,3 @@
except ValueError: # c1 not in openers
return False
return c2 == closers[i] or c2 in quote_pairs.get(c1, u'')
-
-
-# Running this file as a standalone module checks the definitions against a
-# re-calculation::
-
-if __name__ == '__main__':
-
-
-# Unicode punctuation character categories
-# ----------------------------------------
-#
-# For details about Unicode categories, see
-# http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values
-# ::
-
- unicode_punctuation_categories = {
- # 'Pc': 'Connector', # not used in Docutils inline markup recognition
- 'Pd': 'Dash',
- 'Ps': 'Open',
- 'Pe': 'Close',
- 'Pi': 'Initial quote', # may behave like Ps or Pe depending on usage
- 'Pf': 'Final quote', # may behave like Ps or Pe depending on usage
- 'Po': 'Other'
- }
- """Unicode character categories for punctuation"""
-
-
-# generate character pattern strings
-# ==================================
-#
-# ::
-
- def unicode_charlists(categories, cp_min=0, cp_max=None):
- """Return dictionary of Unicode character lists.
-
- For each of the `catagories`, an item contains a list with all Unicode
- characters with `cp_min` <= code-point <= `cp_max` that belong to
- the category.
-
- The default values check every code-point supported by Python
- (`sys.maxint` is 0x10FFFF in a "wide" build and 0xFFFF in a "narrow"
- build, i.e. ucs4 and ucs2 respectively).
- """
- # Determine highest code point with one of the given categories
- # (may shorten the search time considerably if there are many
- # categories with not too high characters):
- if cp_max is None:
- cp_max = max(x for x in xrange(sys.maxunicode+1)
- if unicodedata.category(unichr(x)) in categories)
- # print cp_max # => 74867 for unicode_punctuation_categories
- charlists = {}
- for cat in categories:
- charlists[cat] = [unichr(x) for x in xrange(cp_min, cp_max+1)
- if unicodedata.category(unichr(x)) == cat]
- return charlists
-
-
-# Character categories in Docutils
-# --------------------------------
-#
-# ::
-
- def punctuation_samples():
-
- """Docutils punctuation category sample strings.
-
- Return list of sample strings for the categories "Open", "Close",
- "Delimiters" and "Closing-Delimiters" used in the `inline markup
- recognition rules`_.
- """
-
- # Lists with characters in Unicode punctuation character categories
- cp_min = 160 # ASCII chars have special rules for backwards compatibility
- ucharlists = unicode_charlists(unicode_punctuation_categories, cp_min)
-
- # match opening/closing characters
- # --------------------------------
- # Rearange the lists to ensure matching characters at the same
- # index position.
-
- # low quotation marks are also used as closers (e.g. in Greek)
- # move them to category Pi:
- ucharlists['Ps'].remove(u'‚') # 201A SINGLE LOW-9 QUOTATION MARK
- ucharlists['Ps'].remove(u'„') # 201E DOUBLE LOW-9 QUOTATION MARK
- ucharlists['Pi'] += [u'‚', u'„']
-
- ucharlists['Pi'].remove(u'‛') # 201B SINGLE HIGH-REVERSED-9 QUOTATION MARK
- ucharlists['Pi'].remove(u'‟') # 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK
- ucharlists['Pf'] += [u'‛', u'‟']
-
- # 301F LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant:
- ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d')
-
- # print u''.join(ucharlists['Ps']).encode('utf8')
- # print u''.join(ucharlists['Pe']).encode('utf8')
- # print u''.join(ucharlists['Pi']).encode('utf8')
- # print u''.join(ucharlists['Pf']).encode('utf8')
-
- # The Docutils character categories
- # ---------------------------------
- #
- # The categorization of ASCII chars is non-standard to reduce
- # both false positives and need for escaping. (see `inline markup
- # recognition rules`_)
-
- # allowed before markup if there is a matching closer
- openers = [u'"\'(<\\[{']
- for category in ('Ps', 'Pi', 'Pf'):
- openers.extend(ucharlists[category])
-
- # allowed after markup if there is a matching opener
- closers = [u'"\')>\\]}']
- for category in ('Pe', 'Pf', 'Pi'):
- closers.extend(ucharlists[category])
-
- # non-matching, allowed on both sides
- delimiters = [u'\\-/:']
- for category in ('Pd', 'Po'):
- delimiters.extend(ucharlists[category])
-
- # non-matching, after markup
- closing_delimiters = [r'\\.,;!?']
-
- # # Test open/close matching:
- # for i in range(min(len(openers),len(closers))):
- # print '%4d %s %s' % (i, openers[i].encode('utf8'),
- # closers[i].encode('utf8'))
-
- return [u''.join(chars) for chars in (openers, closers, delimiters,
- closing_delimiters)]
-
- def separate_wide_chars(s):
- """Return (s1,s2) with characters above 0xFFFF in s2"""
- maxunicode_narrow = 0xFFFF
- l1 = [ch for ch in s if ord(ch) <= maxunicode_narrow]
- l2 = [ch for ch in s if ord(ch) > maxunicode_narrow]
- return ''.join(l1), ''.join(l2)
-
- def mark_intervals(s):
- """Return s with shortcut notation for runs of consecutive characters
-
- Sort string and replace 'cdef' by 'c-f' and similar.
- """
- l =[]
- s = [ord(ch) for ch in s]
- s.sort()
- for n in s:
- try:
- if l[-1][-1]+1 == n:
- l[-1].append(n)
- else:
- l.append([n])
- except IndexError:
- l.append([n])
-
- l2 = []
- for i in l:
- i = [unichr(n) for n in i]
- if len(i) > 2:
- i = i[0], u'-', i[-1]
- l2.extend(i)
-
- return ''.join(l2)
-
- def wrap_string(s, startstring= "(u'",
- endstring = "')", wrap=65):
- """Line-wrap a unicode string literal definition."""
- c = len(startstring)
- contstring = "'\n" + ' ' * (len(startstring)-2) + "u'"
- l = [startstring]
- for ch in s.replace("'", r"\'"):
- c += 1
- if ch == '\\' and c > wrap:
- c = len(startstring)
- ch = contstring + ch
- l.append(ch)
- l.append(endstring)
- return ''.join(l)
-
-
- def print_differences(old, new, name):
- """List characters missing in old/new."""
- if old != new:
- print('new %s:' % name)
- for c in new:
- if c not in old:
- print ' %04x'%ord(c), unicodedata.name(c)
- print('removed %s:' % name)
- for c in old:
- if c not in new:
- print ' %04x'%ord(c), unicodedata.name(c)
-
-
-# print results
-# =============
-#
-# (re) create and compare the samples:
-#
-# ::
-
- (o, c, d, cd) = punctuation_samples()
- o, o_wide = separate_wide_chars(o)
- c, c_wide = separate_wide_chars(c)
- d, d_wide = separate_wide_chars(d)
- d = d[:5] + mark_intervals(d[5:])
- d_wide = mark_intervals(d_wide)
-
- print_differences(openers, o, 'openers')
- if o_wide:
- print '+ openers-wide = ur"""%s"""' % o_wide.encode('utf8')
- print_differences(closers, c, 'closers')
- if c_wide:
- print '+ closers-wide = ur"""%s"""' % c_wide.encode('utf8')
-
- print_differences(delimiters, d + d_wide, 'delimiters')
- print_differences(closing_delimiters, cd, 'closing_delimiters')
-
-# Print literal code to define the character sets:
-#
-# ::
-
- print '# based on Unicode version', unicodedata.unidata_version
-
- # `openers` and `closers` must be verbose and keep order because they are
- # also used in `match_chars()`.
- print wrap_string(o.encode('unicode-escape').decode(),
- startstring="openers = (u'")
- print wrap_string(c.encode('unicode-escape').decode(),
- startstring="closers = (u'")
- # delimiters: sort and use shortcut for intervals (saves ~150 characters):
- print wrap_string(d.encode('unicode-escape').decode(),
- startstring="delimiters = (u'")
- # add characters in the upper plane only in a "wide" build:
- print 'if sys.maxunicode >= 0x10FFFF: # "wide" build'
- print wrap_string(d_wide.encode('unicode-escape').decode(),
- startstring=" delimiters += (u'")
- # additional closing delimiters:
- print wrap_string(cd.encode('unicode-escape').decode(),
- startstring="closing_delimiters = (u'")
-
-
-# test prints
-#
-# ::
-
- # print "wide" Unicode characters:
- # ucharlists = unicode_charlists(unicode_punctuation_categories)
- # for key in ucharlists:
- # if key.endswith('wide'):
- # print key, ucharlists[key]
-
- # print 'openers = ', repr(openers)
- # print 'closers = ', repr(closers)
- # print 'delimiters = ', repr(delimiters)
- # print 'closing_delimiters = ', repr(closing_delimiters)
-
- # ucharlists = unicode_charlists(unicode_punctuation_categories)
- # for cat, chars in ucharlists.items():
- # # print cat, chars
- # # compact output (visible with a comprehensive font):
- # print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
-
-# verbose print
-#
-# ::
-
- # print 'openers:'
- # for ch in openers:
- # print ch.encode('utf8'), unicodedata.name(ch)
- # print 'closers:'
- # for ch in closers:
- # print ch.encode('utf8'), unicodedata.name(ch)
- # print 'delimiters:'
- # for ch in delimiters:
- # print ch.encode('utf8'), unicodedata.name(ch)
- # print 'closing_delimiters:'
- # for ch in closing_delimiters:
- # print ch.encode('utf8'), unicodedata.name(ch)
Added: trunk/docutils/tools/dev/generate_punctuation_chars.py
===================================================================
--- trunk/docutils/tools/dev/generate_punctuation_chars.py (rev 0)
+++ trunk/docutils/tools/dev/generate_punctuation_chars.py 2017-01-05 09:49:26 UTC (rev 8014)
@@ -0,0 +1,334 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# :Copyright: © 2011, 2016 Günter Milde.
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
+
+# :Id: $Id$
+#
+# ::
+
+import sys, re
+import unicodedata
+
+# import the punctuation_chars module from the source or Py3k build
+# path for local Python modules
+if sys.version_info < (3,):
+ sys.path.insert(0, '../../docutils')
+else:
+ sys.path.insert(0, '../../build/lib')
+ unichr = chr
+
+from docutils.utils.punctuation_chars import (openers, closers, delimiters,
+ closing_delimiters)
+
+# (re)generate the utils.punctuation_chars module
+# ===============================================
+#
+# The category of some characters may change with the development of the
+# Unicode standard. This tool checks the patterns in `utils.punctuation_chars`
+# against a re-calculation based on the "unicodedata" stdlib module
+# which may give different results for different Python versions.
+#
+# Updating the patterns with a new (Python|Unicode standard) version is an API
+# change (may render valid rST documents invalid). It should only be done for
+# "feature releases" and requires also updating the specification of `inline
+# markup recognition rules`_ in ../../docs/ref/rst/restructuredtext.txt.
+#
+# Generation of the character category patterns
+# ----------------------------------------------
+#
+#
+# Unicode punctuation character categories
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# For details about Unicode categories, see
+# http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values
+# ::
+
+unicode_punctuation_categories = {
+ # 'Pc': 'Connector', # not used in Docutils inline markup recognition
+ 'Pd': 'Dash',
+ 'Ps': 'Open',
+ 'Pe': 'Close',
+ 'Pi': 'Initial quote', # may behave like Ps or Pe depending on usage
+ 'Pf': 'Final quote', # may behave like Ps or Pe depending on usage
+ 'Po': 'Other'
+ }
+"""Unicode character categories for punctuation"""
+
+
+# generate character pattern strings
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# ::
+
+def unicode_charlists(categories, cp_min=0, cp_max=None):
+ """Return dictionary of Unicode character lists.
+
+ For each of the `catagories`, an item contains a list with all Unicode
+ characters with `cp_min` <= code-point <= `cp_max` that belong to
+ the category.
+
+ The default values check every code-point supported by Python
+ (`sys.maxint` is 0x10FFFF in a "wide" build and 0xFFFF in a "narrow"
+ build, i.e. ucs4 and ucs2 respectively).
+ """
+ # Determine highest code point with one of the given categories
+ # (may shorten the search time considerably if there are many
+ # categories with not too high characters):
+ if cp_max is None:
+ cp_max = max(x for x in range(sys.maxunicode+1)
+ if unicodedata.category(unichr(x)) in categories)
+ # print(cp_max) # => 74867 for unicode_punctuation_categories
+ charlists = {}
+ for cat in categories:
+ charlists[cat] = [unichr(x) for x in range(cp_min, cp_max+1)
+ if unicodedata.category(unichr(x)) == cat]
+ return charlists
+
+
+# Character categories in Docutils
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# ::
+
+def character_category_patterns():
+
+ """Docutils character category patterns.
+
+ Return list of pattern strings for the categories "Open", "Close",
+ "Delimiters" and "Closing-Delimiters" used in the `inline markup
+ recognition rules`_.
+ """
+
+ cp_min = 160 # ASCII chars have special rules for backwards compatibility
+ ucharlists = unicode_charlists(unicode_punctuation_categories, cp_min)
+ """Strings of characters in Unicode punctuation character categories"""
+
+ # match opening/closing characters
+ # --------------------------------
+ # Rearange the lists to ensure matching characters at the same
+ # index position.
+
+ # low quotation marks are also used as closers (e.g. in Greek)
+ # move them to category Pi:
+ ucharlists['Ps'].remove(u'‚') # 201A SINGLE LOW-9 QUOTATION MARK
+ ucharlists['Ps'].remove(u'„') # 201E DOUBLE LOW-9 QUOTATION MARK
+ ucharlists['Pi'] += [u'‚', u'„']
+
+ ucharlists['Pi'].remove(u'‛') # 201B SINGLE HIGH-REVERSED-9 QUOTATION MARK
+ ucharlists['Pi'].remove(u'‟') # 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+ ucharlists['Pf'] += [u'‛', u'‟']
+
+ # 301F LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant:
+ ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d')
+
+ # print(u''.join(ucharlists['Ps']).encode('utf8')
+ # print(u''.join(ucharlists['Pe']).encode('utf8')
+ # print(u''.join(ucharlists['Pi']).encode('utf8')
+ # print(u''.join(ucharlists['Pf']).encode('utf8')
+
+ # The Docutils character categories
+ # ---------------------------------
+ #
+ # The categorization of ASCII chars is non-standard to reduce
+ # both false positives and need for escaping. (see `inline markup
+ # recognition rules`_)
+
+ # allowed before markup if there is a matching closer
+ openers = [u'"\'(<\\[{']
+ for category in ('Ps', 'Pi', 'Pf'):
+ openers.extend(ucharlists[category])
+
+ # allowed after markup if there is a matching opener
+ closers = [u'"\')>\\]}']
+ for category in ('Pe', 'Pf', 'Pi'):
+ closers.extend(ucharlists[category])
+
+ # non-matching, allowed on both sides
+ delimiters = [u'\\-/:']
+ for category in ('Pd', 'Po'):
+ delimiters.extend(ucharlists[category])
+
+ # non-matching, after markup
+ closing_delimiters = [r'\\.,;!?']
+
+ # # Test open/close matching:
+ # for i in range(min(len(openers),len(closers))):
+ # print('%4d %s %s' % (i, openers[i].encode('utf8'),
+ # closers[i].encode('utf8'))
+
+ return [u''.join(chars) for chars in (openers, closers, delimiters,
+ closing_delimiters)]
+
+def separate_wide_chars(s):
+ """Return (s1,s2) with characters above 0xFFFF in s2"""
+ maxunicode_narrow = 0xFFFF
+ l1 = [ch for ch in s if ord(ch) <= maxunicode_narrow]
+ l2 = [ch for ch in s if ord(ch) > maxunicode_narrow]
+ return ''.join(l1), ''.join(l2)
+
+def mark_intervals(s):
+ """Return s with shortcut notation for runs of consecutive characters
+
+ Sort string and replace 'cdef' by 'c-f' and similar.
+ """
+ l =[]
+ s = [ord(ch) for ch in s]
+ s.sort()
+ for n in s:
+ try:
+ if l[-1][-1]+1 == n:
+ l[-1].append(n)
+ else:
+ l.append([n])
+ except IndexError:
+ l.append([n])
+
+ l2 = []
+ for i in l:
+ i = [unichr(n) for n in i]
+ if len(i) > 2:
+ i = i[0], u'-', i[-1]
+ l2.extend(i)
+
+ return ''.join(l2)
+
+def wrap_string(s, startstring= "(u'",
+ endstring = "')", wrap=65):
+ """Line-wrap a unicode string literal definition."""
+ c = len(startstring)
+ contstring = "'\n" + ' ' * (len(startstring)-2) + "u'"
+ l = [startstring]
+ for ch in s.replace("'", r"\'"):
+ c += 1
+ if ch == '\\' and c > wrap:
+ c = len(startstring)
+ ch = contstring + ch
+ l.append(ch)
+ l.append(endstring)
+ return ''.join(l)
+
+
+def print_differences(old, new, name):
+ """List characters missing in old/new."""
+ if old != new:
+ print('new %s:' % name)
+ for c in new:
+ if c not in old:
+ print(' %04x'%ord(c), unicodedata.name(c))
+ print('removed %s:' % name)
+ for c in old:
+ if c not in new:
+ print(' %04x'%ord(c), unicodedata.name(c))
+
+
+# Output
+# ------
+#
+# ::
+
+if __name__ == '__main__':
+
+# (Re)create and compare character patterns
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# ::
+
+ (o, c, d, cd) = character_category_patterns()
+ o, o_wide = separate_wide_chars(o)
+ c, c_wide = separate_wide_chars(c)
+ d, d_wide = separate_wide_chars(d)
+ d = d[:5] + mark_intervals(d[5:])
+ d_wide = mark_intervals(d_wide)
+
+ print_differences(openers, o, 'openers')
+ if o_wide:
+ print('+ openers-wide = ur"""%s"""' % o_wide.encode('utf8'))
+ print_differences(closers, c, 'closers')
+ if c_wide:
+ print('+ closers-wide = ur"""%s"""' % c_wide.encode('utf8'))
+
+ print_differences(delimiters, d + d_wide, 'delimiters')
+ print_differences(closing_delimiters, cd, 'closing_delimiters')
+
+# Print literal code to define the character sets
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# This code can be copied to punctuation_chars.py if an update is wanted.
+
+# Unicode version::
+
+ print('# based on Unicode version %s' % unicodedata.unidata_version)
+
+# `openers` and `closers` must be verbose and keep order because they are
+# also used in `match_chars()`::
+
+ print(wrap_string(o.encode('unicode-escape').decode(),
+ startstring="openers = (u'"))
+ print(wrap_string(c.encode('unicode-escape').decode(),
+ startstring="closers = (u'"))
+
+# delimiters: sort and use shortcut for intervals (saves ~150 characters)::
+
+ print(wrap_string(d.encode('unicode-escape').decode(),
+ startstring="delimiters = (u'"))
+
+# add characters in the upper plane only in a "wide" build::
+
+ print('if sys.maxunicode >= 0x10FFFF: # "wide" build')
+ print(wrap_string(d_wide.encode('unicode-escape').decode(),
+ startstring=" delimiters += (u'"))
+
+# additional closing delimiters::
+
+ print(wrap_string(cd.encode('unicode-escape').decode(),
+ startstring="closing_delimiters = (u'"))
+
+
+# test prints
+# ~~~~~~~~~~~
+#
+# For interactive use in development you may uncomment the following
+# definitions::
+
+ # print "wide" Unicode characters:
+ # ucharlists = unicode_charlists(unicode_punctuation_categories)
+ # for key in ucharlists:
+ # if key.endswith('wide'):
+ # print key, ucharlists[key]
+
+ # print 'openers = ', repr(openers)
+ # print 'closers = ', repr(closers)
+ # print 'delimiters = ', repr(delimiters)
+ # print 'closing_delimiters = ', repr(closing_delimiters)
+
+ # ucharlists = unicode_charlists(unicode_punctuation_categories)
+ # for cat, chars in ucharlists.items():
+ # # print cat, chars
+ # # compact output (visible with a comprehensive font):
+ # print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8')
+
+# verbose print
+#
+# ::
+
+ # print 'openers:'
+ # for ch in openers:
+ # print ch.encode('utf8'), unicodedata.name(ch)
+ # print 'closers:'
+ # for ch in closers:
+ # print ch.encode('utf8'), unicodedata.name(ch)
+ # print 'delimiters:'
+ # for ch in delimiters:
+ # print ch.encode('utf8'), unicodedata.name(ch)
+ # print 'closing_delimiters:'
+ # for ch in closing_delimiters:
+ # print ch.encode('utf8'), unicodedata.name(ch)
Property changes on: trunk/docutils/tools/dev/generate_punctuation_chars.py
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-01-17 15:06:20
|
Revision: 8016
http://sourceforge.net/p/docutils/code/8016
Author: milde
Date: 2017-01-17 15:06:17 +0000 (Tue, 17 Jan 2017)
Log Message:
-----------
Generate the complete punctuation_chars module with the corresponding tool.
Modified Paths:
--------------
trunk/docutils/docutils/utils/punctuation_chars.py
trunk/docutils/tools/dev/generate_punctuation_chars.py
Modified: trunk/docutils/docutils/utils/punctuation_chars.py
===================================================================
--- trunk/docutils/docutils/utils/punctuation_chars.py 2017-01-08 09:54:35 UTC (rev 8015)
+++ trunk/docutils/docutils/utils/punctuation_chars.py 2017-01-17 15:06:17 UTC (rev 8016)
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# :Copyright: © 2011 Günter Milde.
+# :Id: $Id$
+# :Copyright: © 2011, 2017 Günter Milde.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
@@ -9,46 +10,39 @@
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
-
-# :Id: $Id$
#
+# This file is generated by
+# ``docutils/tools/dev/generate_punctuation_chars.py``.
# ::
import sys, re
import unicodedata
-# Docutils character category patterns
-# ------------------------------------
-#
-# This module provides patterns for the implementation of the
-# `inline markup recognition rules`_ in the reStructuredText parser
-# `<../parsers/rst/states.py>`__ based on Unicode character categories.
-# The patterns are used inside ``[ ]`` in regular expressions.
-#
-# Rule (5) requires determination of matching open/close pairs. However,
-# the pairing of open/close quotes is ambigue due to different typographic
-# conventions in different languages. The ``quote_pairs`` function tests
-# whether two characters form an open/close pair.
-#
-# The patterns are generated by
-# ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependance
-# on the Python version and avoid the time-consuming generation with every
-# Docutils run. See there for motives and implementation details.
-#
-# The category of some characters changed with the development of the Unicode
-# standard. The current lists are generated with the help of the "unicodedata"
-# module of Python 2.7 (based on the UnicodeData.txt file version 5.2.0).
-#
-# Updating the patterns with a new Unicode standard version is an API
-# change (may render valid rST documents invalid). It should only be done for
-# "feature releases" and requires also updating the specification of `inline
-# markup recognition rules`_ in ../../docs/ref/rst/restructuredtext.txt.
-#
-# .. _inline markup recognition rules:
-# ../../docs/ref/rst/restructuredtext.html#inline-markup
-#
-# ::
+"""Docutils character category patterns.
+ Patterns for the implementation of the `inline markup recognition rules`_
+ in the reStructuredText parser `docutils.parsers.rst.states.py` based
+ on Unicode character categories.
+ The patterns are used inside ``[ ]`` in regular expressions.
+
+ Rule (5) requires determination of matching open/close pairs. However, the
+ pairing of open/close quotes is ambiguous due to different typographic
+ conventions in different languages. The ``quote_pairs`` function tests
+ whether two characters form an open/close pair.
+
+ The patterns are generated by
+ ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependence
+ on the Python version and avoid the time-consuming generation with every
+ Docutils run. See there for motives and implementation details.
+
+ The category of some characters changed with the development of the
+ Unicode standard. The current lists are generated with the help of the
+ "unicodedata" module of Python 2.7.13 (based on Unicode version 5.2.0).
+
+ .. _inline markup recognition rules:
+ http://docutils.sf.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+"""
+
openers = (u'"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768'
u'\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea'
u'\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991'
@@ -99,23 +93,28 @@
# Matching open/close quotes
# --------------------------
-#
-# The pairing of open/close quotes is ambigue due to different typographic
-# conventions in different languages. Specify additional valid matches::
-quote_pairs = {# open char: matching closing characters
- u'\xbb': u'\xbb', # Swedish
- u'\u2018': u'\u201a', # Greek
- u'\u2019': u'\u2019', # Swedish
- u'\u201a': u'\u2018\u2019', # German, Polish
- u'\u201c': u'\u201e', # German
- u'\u201e': u'\u201c\u201d',
- u'\u201d': u'\u201d', # Swedish
- u'\u203a': u'\u203a', # Swedish
- }
+quote_pairs = {# open char: matching closing characters # usage example
+ u'\xbb': u'\xbb', # » » Swedish
+ u'\u2018': u'\u201a', # ‘ ‚ Albanian/Greek/Turkish
+ u'\u2019': u'\u2019', # ’ ’ Swedish
+ u'\u201a': u'\u2018\u2019', # ‚ ‘ German ‚ ’ Polish
+ u'\u201c': u'\u201e', # “ „ Albanian/Greek/Turkish
+ u'\u201e': u'\u201c\u201d', # „ “ German „ ” Polish
+ u'\u201d': u'\u201d', # ” ” Swedish
+ u'\u203a': u'\u203a', # › › Swedish
+ }
+"""Additional open/close quote pairs."""
def match_chars(c1, c2):
- """Test whether `c1` and `c2` are a matching open/close character pair."""
+ """Test whether `c1` and `c2` are a matching open/close character pair.
+
+ Matching open/close pairs are at the same position in
+ `punctuation_chars.openers` and `punctuation_chars.closers`.
+ The pairing of open/close quotes is ambiguous due to different
+ typographic conventions in different languages,
+ so we test for additional matches stored in `quote_pairs`.
+ """
try:
i = openers.index(c1)
except ValueError: # c1 not in openers
Modified: trunk/docutils/tools/dev/generate_punctuation_chars.py
===================================================================
--- trunk/docutils/tools/dev/generate_punctuation_chars.py 2017-01-08 09:54:35 UTC (rev 8015)
+++ trunk/docutils/tools/dev/generate_punctuation_chars.py 2017-01-17 15:06:17 UTC (rev 8016)
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# :Copyright: © 2011, 2016 Günter Milde.
+# :Copyright: © 2011, 2017 Günter Milde.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
@@ -14,37 +14,131 @@
#
# ::
-import sys, re
-import unicodedata
+"""(Re)generate the utils.punctuation_chars module."""
-# import the punctuation_chars module from the source or Py3k build
-# path for local Python modules
-if sys.version_info < (3,):
- sys.path.insert(0, '../../docutils')
-else:
- sys.path.insert(0, '../../build/lib')
- unichr = chr
-
-from docutils.utils.punctuation_chars import (openers, closers, delimiters,
- closing_delimiters)
-
# (re)generate the utils.punctuation_chars module
# ===============================================
#
-# The category of some characters may change with the development of the
+# The category of some characters can change with the development of the
# Unicode standard. This tool checks the patterns in `utils.punctuation_chars`
# against a re-calculation based on the "unicodedata" stdlib module
# which may give different results for different Python versions.
#
-# Updating the patterns with a new (Python|Unicode standard) version is an API
-# change (may render valid rST documents invalid). It should only be done for
-# "feature releases" and requires also updating the specification of `inline
-# markup recognition rules`_ in ../../docs/ref/rst/restructuredtext.txt.
+# Updating the module with changed `unicode_punctuation_categories` (due to
+# a new Python or Unicode standard version is an API cange (may render valid
+# rST documents invalid). It should only be done for "feature releases" and
+# requires also updating the specification of `inline markup recognition
+# rules`_ in ../../docs/ref/rst/restructuredtext.txt.
#
+# .. _inline markup recognition rules:
+# ../../docs/ref/rst/restructuredtext.html#inline-markup
+
+
+# Setup::
+
+import sys, re
+import unicodedata
+
+if sys.version_info >= (3,):
+ unichr = chr # unichr not available in Py3k
+else:
+ import codecs
+ sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
+
+
+# Template for utils.punctuation_chars
+# ------------------------------------
+#
+# Problem: ``ur`` prefix fails with Py 3.5 ::
+
+module_template = u'''#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# :Id: $Id$
+# :Copyright: © 2011, 2017 Günter Milde.
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
+#
+# This file is generated by
+# ``docutils/tools/dev/generate_punctuation_chars.py``.
+# ::
+
+import sys, re
+import unicodedata
+
+"""Docutils character category patterns.
+
+ Patterns for the implementation of the `inline markup recognition rules`_
+ in the reStructuredText parser `docutils.parsers.rst.states.py` based
+ on Unicode character categories.
+ The patterns are used inside ``[ ]`` in regular expressions.
+
+ Rule (5) requires determination of matching open/close pairs. However, the
+ pairing of open/close quotes is ambiguous due to different typographic
+ conventions in different languages. The ``quote_pairs`` function tests
+ whether two characters form an open/close pair.
+
+ The patterns are generated by
+ ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependence
+ on the Python version and avoid the time-consuming generation with every
+ Docutils run. See there for motives and implementation details.
+
+ The category of some characters changed with the development of the
+ Unicode standard. The current lists are generated with the help of the
+ "unicodedata" module of Python %(python_version)s (based on Unicode version %(unidata_version)s).
+
+ .. _inline markup recognition rules:
+ http://docutils.sf.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+"""
+
+%(openers)s
+%(closers)s
+%(delimiters)s
+if sys.maxunicode >= 0x10FFFF: # "wide" build
+%(delimiters_wide)s
+closing_delimiters = u'\\\\\\\\.,;!?'
+
+
+# Matching open/close quotes
+# --------------------------
+
+quote_pairs = {# open char: matching closing characters # usage example
+ u'\\xbb': u'\\xbb', # » » Swedish
+ u'\\u2018': u'\\u201a', # ‘ ‚ Albanian/Greek/Turkish
+ u'\\u2019': u'\\u2019', # ’ ’ Swedish
+ u'\\u201a': u'\\u2018\\u2019', # ‚ ‘ German ‚ ’ Polish
+ u'\\u201c': u'\\u201e', # “ „ Albanian/Greek/Turkish
+ u'\\u201e': u'\\u201c\\u201d', # „ “ German „ ” Polish
+ u'\\u201d': u'\\u201d', # ” ” Swedish
+ u'\\u203a': u'\\u203a', # › › Swedish
+ }
+"""Additional open/close quote pairs."""
+
+def match_chars(c1, c2):
+ """Test whether `c1` and `c2` are a matching open/close character pair.
+
+ Matching open/close pairs are at the same position in
+ `punctuation_chars.openers` and `punctuation_chars.closers`.
+ The pairing of open/close quotes is ambiguous due to different
+ typographic conventions in different languages,
+ so we test for additional matches stored in `quote_pairs`.
+ """
+ try:
+ i = openers.index(c1)
+ except ValueError: # c1 not in openers
+ return False
+ return c2 == closers[i] or c2 in quote_pairs.get(c1, u'')\
+'''
+
+
# Generation of the character category patterns
# ----------------------------------------------
#
-#
# Unicode punctuation character categories
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
@@ -160,11 +254,6 @@
# non-matching, after markup
closing_delimiters = [r'\\.,;!?']
- # # Test open/close matching:
- # for i in range(min(len(openers),len(closers))):
- # print('%4d %s %s' % (i, openers[i].encode('utf8'),
- # closers[i].encode('utf8'))
-
return [u''.join(chars) for chars in (openers, closers, delimiters,
closing_delimiters)]
@@ -202,7 +291,7 @@
return ''.join(l2)
def wrap_string(s, startstring= "(u'",
- endstring = "')", wrap=65):
+ endstring = "')", wrap=67):
"""Line-wrap a unicode string literal definition."""
c = len(startstring)
contstring = "'\n" + ' ' * (len(startstring)-2) + "u'"
@@ -228,8 +317,20 @@
for c in old:
if c not in new:
print(' %04x'%ord(c), unicodedata.name(c))
+ else:
+ print('%s unchanged' % name)
+def print_quote_pairs():
+ pairs = [(o,c) for o,c in quote_pairs.items()]
+ for o,c in sorted(pairs):
+ print((u'%s %s' % (o,c)).encode('utf8'))
+ # # Test open/close matching:
+ # for i in range(min(len(openers),len(closers))):
+ # print('%4d %s %s' % (i, openers[i].encode('utf8'),
+ # closers[i].encode('utf8'))
+
+
# Output
# ------
#
@@ -237,62 +338,91 @@
if __name__ == '__main__':
-# (Re)create and compare character patterns
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ import argparse
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('-t', '--test', action="store_true",
+ help='test for changed character categories')
+ args = parser.parse_args()
+
+# (Re)create character patterns
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# ::
(o, c, d, cd) = character_category_patterns()
+
+# Characters in the upper plane require a "wide" build::
+
o, o_wide = separate_wide_chars(o)
c, c_wide = separate_wide_chars(c)
d, d_wide = separate_wide_chars(d)
+
+# delimiters: sort and use shortcut for intervals (saves ~150 characters)
+# (`openers` and `closers` must be verbose and keep order
+# because they are also used in `match_chars()`)::
+
d = d[:5] + mark_intervals(d[5:])
d_wide = mark_intervals(d_wide)
- print_differences(openers, o, 'openers')
- if o_wide:
- print('+ openers-wide = ur"""%s"""' % o_wide.encode('utf8'))
- print_differences(closers, c, 'closers')
- if c_wide:
- print('+ closers-wide = ur"""%s"""' % c_wide.encode('utf8'))
- print_differences(delimiters, d + d_wide, 'delimiters')
- print_differences(closing_delimiters, cd, 'closing_delimiters')
+# Test: compare module content with re-generated definitions
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ::
-# Print literal code to define the character sets
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# This code can be copied to punctuation_chars.py if an update is wanted.
+ if args.test:
-# Unicode version::
+# Import the punctuation_chars module from the source
+# or Py3k build path for local Python modules::
- print('# based on Unicode version %s' % unicodedata.unidata_version)
+ if sys.version_info < (3,):
+ sys.path.insert(0, '../../docutils')
+ else:
+ sys.path.insert(0, '../../build/lib')
-# `openers` and `closers` must be verbose and keep order because they are
-# also used in `match_chars()`::
+ from docutils.utils.punctuation_chars import (openers, closers,
+ delimiters, closing_delimiters)
- print(wrap_string(o.encode('unicode-escape').decode(),
- startstring="openers = (u'"))
- print(wrap_string(c.encode('unicode-escape').decode(),
- startstring="closers = (u'"))
+ print('Check for differences between the current `punctuation_chars`'
+ ' module\n and a regeneration based on Unicode version %s:'
+ % unicodedata.unidata_version)
-# delimiters: sort and use shortcut for intervals (saves ~150 characters)::
+ print_differences(openers, o, 'openers')
+ if o_wide:
+ print('+ openers-wide = ur"""%s"""' % o_wide.encode('utf8'))
+ print_differences(closers, c, 'closers')
+ if c_wide:
+ print('+ closers-wide = ur"""%s"""' % c_wide.encode('utf8'))
- print(wrap_string(d.encode('unicode-escape').decode(),
- startstring="delimiters = (u'"))
+ print_differences(delimiters, d + d_wide, 'delimiters')
+ print_differences(closing_delimiters, cd, 'closing_delimiters')
-# add characters in the upper plane only in a "wide" build::
+ sys.exit()
- print('if sys.maxunicode >= 0x10FFFF: # "wide" build')
- print(wrap_string(d_wide.encode('unicode-escape').decode(),
- startstring=" delimiters += (u'"))
+# Print re-generation of the punctuation_chars module
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The output can be copied to docutils/utils if an update is wanted
+# (API change, see Intro).
-# additional closing delimiters::
+# Replacements::
- print(wrap_string(cd.encode('unicode-escape').decode(),
- startstring="closing_delimiters = (u'"))
+ substitutions = {
+ 'python_version': '.'.join(str(s) for s in sys.version_info[:3]),
+ 'unidata_version': unicodedata.unidata_version,
+ 'openers': wrap_string(o.encode('unicode-escape').decode(),
+ startstring="openers = (u'"),
+ 'closers': wrap_string(c.encode('unicode-escape').decode(),
+ startstring="closers = (u'"),
+ 'delimiters': wrap_string(d.encode('unicode-escape').decode(),
+ startstring="delimiters = (u'"),
+ 'delimiters_wide': wrap_string(
+ d_wide.encode('unicode-escape').decode(),
+ startstring=" delimiters += (u'")
+ }
+ print(module_template % substitutions)
+
# test prints
# ~~~~~~~~~~~
#
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-01-17 20:02:52
|
Revision: 8017
http://sourceforge.net/p/docutils/code/8017
Author: milde
Date: 2017-01-17 20:02:50 +0000 (Tue, 17 Jan 2017)
Log Message:
-----------
Update and add smartquote definitions for some languages.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/utils/smartquotes.py
trunk/docutils/test/test_transforms/test_smartquotes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-01-17 15:06:17 UTC (rev 8016)
+++ trunk/docutils/HISTORY.txt 2017-01-17 20:02:50 UTC (rev 8017)
@@ -20,7 +20,7 @@
* docs/ref/rst/restructuredtext.txt:
- Added documentation for escaped whitespace in URI contexts.
- - Clarified use of Unicode character categories.
+ - Clarify use of Unicode character categories.
* docutils/parsers/rst/states.py:
@@ -28,7 +28,7 @@
- Recognize non-ASCII whitespace around inline literal, target,
and substitution.
-* docutils/parsers/rst/directives/tables.py
+* docutils/parsers/rst/directives/tables.py:
- Rework patch [ 120 ] (revert change to ``Table.get_column_widths()``
that led to problems in an application with a custom table directive).
@@ -38,11 +38,16 @@
- Added ``split_escaped_whitespace`` function, support for escaped
whitespace in URI contexts.
-* tools/
+* docutils/utils/smartquotes.py:
- - New front-end ``rst2html4.py``.
+ - Update quote definitions for languages et, fi, ro, sv, tr, uk.
+ - New quote definitions for hr, hsb, hu, lv, sl.
+
+* tools/rst2html4.py: New front-end.
+
+* tools/dev/generate_punctuation_chars.py: New skript
+ to test and update utils.punctuation_chars.
-
Release 0.13.1 (2016-12-09)
===========================
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-01-17 15:06:17 UTC (rev 8016)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-01-17 20:02:50 UTC (rev 8017)
@@ -384,6 +384,7 @@
# http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen
# https://fr.wikipedia.org/wiki/Guillemet
# https://en.wikipedia.org/wiki/Hebrew_punctuation#Quotation_marks
+ # http://www.tustep.uni-tuebingen.de/bi/bi00/bi001t1-anfuehrung.pdf
quotes = {'af': u'“”‘’',
'af-x-altquot': u'„”‚’',
'ca': u'«»“”',
@@ -400,38 +401,51 @@
'en-UK': u'‘’“”',
'eo': u'“”‘’',
'es': u'«»“”',
+ 'es-x-altquot': u'“”‘’',
'et': u'„“‚‘', # no secondary quote listed in
- 'et-x-altquot': u'»«›‹', # the sources above (wikipedia.org)
+ 'et-x-altquot': u'«»‹›', # the sources above (wikipedia.org)
'eu': u'«»‹›',
- 'es-x-altquot': u'“”‘’',
'fi': u'””’’',
- 'fi-x-altquot': u'»»’’',
+ 'fi-x-altquot': u'»»››',
'fr': (u'« ', u' »', u'‹ ', u' ›'), # with narrow no-break space
'fr-x-altquot': u'«»‹›', # for use with manually set spaces
- # 'fr-x-altquot': (u'“ ', u' ”', u'‘ ', u' ’'), # rarely used
+ # 'fr-x-altquot2': (u'“ ', u' ”', u'‘ ', u' ’'), # rarely used
'fr-CH': u'«»‹›',
'gl': u'«»“”',
'he': u'”“»«',
'he-x-altquot': u'„”‚’',
+ 'hr': u'„”‘’',
+ 'hr-x-altquot': u'»«›‹',
+ 'hsb': u'„“‚‘',
+ 'hsb-x-altquot':u'»«›‹',
+ 'hu': u'„”«»',
'it': u'«»“”',
'it-CH': u'«»‹›',
'it-x-altquot': u'“”‘’',
+ # 'it-x-altquot2': u'“„‘‚', # antiquated?
'ja': u'「」『』',
'lt': u'„“‚‘',
+ 'lv': u'„“‚‘',
'nl': u'“”‘’',
'nl-x-altquot': u'„”‚’',
+ # 'nl-x-altquot2': u'””’’',
'pl': u'„”«»',
'pl-x-altquot': u'«»“”',
'pt': u'«»“”',
'pt-BR': u'“”‘’',
'ro': u'„”«»',
- 'ro-x-altquot': u'«»„”',
'ru': u'«»„“',
'sk': u'„“‚‘',
'sk-x-altquot': u'»«›‹',
+ 'sl': u'„“‚‘',
+ 'sl-x-altquot': u'»«›‹',
'sv': u'””’’',
'sv-x-altquot': u'»»››',
- # 'sv-x-altquot': u'»«›‹',
+ 'tr': u'“”‘’',
+ 'tr-x-altquot': u'«»‹›',
+ # 'tr-x-altquot2': u'“„‘‚', # antiquated?
+ 'uk': u'«»„“',
+ 'uk-x-altquot': u'„“‚‘',
'zh-CN': u'“”‘’',
'zh-TW': u'「」『』',
}
Modified: trunk/docutils/test/test_transforms/test_smartquotes.py
===================================================================
--- trunk/docutils/test/test_transforms/test_smartquotes.py 2017-01-17 15:06:17 UTC (rev 8016)
+++ trunk/docutils/test/test_transforms/test_smartquotes.py 2017-01-17 20:02:50 UTC (rev 8017)
@@ -206,7 +206,7 @@
.. class:: language-ro
-Alternative Romanian "smart quotes" and 'single' smart quotes.
+Romanian "smart quotes" and 'secondary' smart quotes.
""",
u"""\
<document source="test data">
@@ -215,7 +215,7 @@
<paragraph classes="language-en-uk">
English “smart quotes” and ‘single smart quotes’ have no alternative.
<paragraph classes="language-ro">
- Alternative Romanian «smart quotes» and „single” smart quotes.
+ Romanian „smart quotes” and «secondary» smart quotes.
"""],
])
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-02-03 14:44:18
|
Revision: 8021
http://sourceforge.net/p/docutils/code/8021
Author: milde
Date: 2017-02-03 14:44:16 +0000 (Fri, 03 Feb 2017)
Log Message:
-----------
Provide default title in metadata.
In HTML5, <title> is a required child of the <head> element
and it must not be empty (found with https://validator.w3.org/check)
If there is no given title, we use the source file name as default and
fall back to "docutils document without title" if there is no source file.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/writers/_html_base.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-02-02 16:05:23 UTC (rev 8020)
+++ trunk/docutils/HISTORY.txt 2017-02-03 14:44:16 UTC (rev 8021)
@@ -38,13 +38,17 @@
- Added ``split_escaped_whitespace`` function, support for escaped
whitespace in URI contexts.
-* docutils/utils/smartquotes.py:
+* docutils/utils/smartquotes.py:
- Update quote definitions for languages et, fi, ro, sv, tr, uk.
- New quote definitions for hr, hsb, hu, lv, sl.
-
+
+* docutils/writers/_html_base.py
+
+ - Provide default title in metadata (required by HTML5).
+
* tools/rst2html4.py: New front-end.
-
+
* tools/dev/generate_punctuation_chars.py: New skript
to test and update utils.punctuation_chars.
Modified: trunk/docutils/docutils/writers/_html_base.py
===================================================================
--- trunk/docutils/docutils/writers/_html_base.py 2017-02-02 16:05:23 UTC (rev 8020)
+++ trunk/docutils/docutils/writers/_html_base.py 2017-02-03 14:44:16 UTC (rev 8021)
@@ -691,8 +691,9 @@
self.body.append('\n</pre>\n')
def visit_document(self, node):
- self.head.append('<title>%s</title>\n'
- % self.encode(node.get('title', '')))
+ title = (node.get('title', '') or os.path.basename(node['source'])
+ or 'docutils document without title')
+ self.head.append('<title>%s</title>\n' % self.encode(title))
def depart_document(self, node):
self.head_prefix.extend([self.doctype,
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <go...@us...> - 2017-02-06 00:41:51
|
Revision: 8024
http://sourceforge.net/p/docutils/code/8024
Author: goodger
Date: 2017-02-06 00:41:48 +0000 (Mon, 06 Feb 2017)
Log Message:
-----------
Added support for escaped whitespace in URI contexts
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/parsers/rst/directives/__init__.py
trunk/docutils/test/test_parsers/test_rst/test_directives/test_images.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-02-05 10:29:18 UTC (rev 8023)
+++ trunk/docutils/HISTORY.txt 2017-02-06 00:41:48 UTC (rev 8024)
@@ -28,6 +28,10 @@
- Recognize non-ASCII whitespace around inline literal, target,
and substitution.
+* docutils/parsers/rst/directives/images.py:
+
+ - Added support for escaped whitespace in URI contexts.
+
* docutils/parsers/rst/directives/tables.py:
- Rework patch [ 120 ] (revert change to ``Table.get_column_widths()``
Modified: trunk/docutils/docutils/parsers/rst/directives/__init__.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/directives/__init__.py 2017-02-05 10:29:18 UTC (rev 8023)
+++ trunk/docutils/docutils/parsers/rst/directives/__init__.py 2017-02-06 00:41:48 UTC (rev 8024)
@@ -13,6 +13,7 @@
import sys
from docutils import nodes
+from docutils.utils import split_escaped_whitespace, escape2null, unescape
from docutils.parsers.rst.languages import en as _fallback_language_module
if sys.version_info < (2,5):
from docutils._compat import __import__
@@ -189,7 +190,7 @@
def uri(argument):
"""
- Return the URI argument with whitespace removed.
+ Return the URI argument with unescaped whitespace removed.
(Directive option conversion function.)
Raise ``ValueError`` if no argument is found.
@@ -197,7 +198,8 @@
if argument is None:
raise ValueError('argument required but none supplied')
else:
- uri = ''.join(argument.split())
+ parts = split_escaped_whitespace(escape2null(argument))
+ uri = ' '.join(''.join(unescape(part).split()) for part in parts)
return uri
def nonnegative_int(argument):
Modified: trunk/docutils/test/test_parsers/test_rst/test_directives/test_images.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_directives/test_images.py 2017-02-05 10:29:18 UTC (rev 8023)
+++ trunk/docutils/test/test_parsers/test_rst/test_directives/test_images.py 2017-02-06 00:41:48 UTC (rev 8024)
@@ -432,6 +432,16 @@
<image uri="test.png">
<target ids="uppercase" names="uppercase" refuri="http://docutils.sourceforge.net/">
"""],
+[r"""
+.. image:: path\ with\ spaces/name\ with\ spaces.png
+ :target: path\ with\ spaces/
+ target\ with\ spaces\ across\ lines.html
+""",
+"""\
+<document source="test data">
+ <reference refuri="path with spaces/target with spaces across lines.html">
+ <image uri="path with spaces/name with spaces.png">
+"""],
]
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-03-11 12:09:39
|
Revision: 8046
http://sourceforge.net/p/docutils/code/8046
Author: milde
Date: 2017-03-11 12:09:36 +0000 (Sat, 11 Mar 2017)
Log Message:
-----------
Cleanup and documentation update for latex writer class handling.
Replace the special casing for topic elements with the generic block-level
element wrapper.
Rework latex writer documentation.
Minor fixes to latex writers.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docs/user/latex.txt
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/docutils/writers/latex2e/xelatex.tex
trunk/docutils/docutils/writers/xetex/__init__.py
trunk/docutils/test/functional/expected/standalone_rst_latex.tex
trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
trunk/docutils/test/functional/expected/xetex-cyrillic.tex
trunk/docutils/test/functional/input/data/custom_roles_latex.txt
trunk/docutils/test/functional/input/data/unicode.txt
trunk/docutils/test/functional/input/standalone_rst_latex.txt
trunk/docutils/test/functional/input/standalone_rst_xetex.txt
Added Paths:
-----------
trunk/docutils/test/functional/input/data/classes_latex.txt
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-03-07 13:06:08 UTC (rev 8045)
+++ trunk/docutils/HISTORY.txt 2017-03-11 12:09:36 UTC (rev 8046)
@@ -71,7 +71,8 @@
* docutils/writers/latex2e/__init__.py
- Handle class arguments for block-level elements by wrapping them
- in a "DUclass" environment.
+ in a "DUclass" environment. This replaces the special handling for
+ "epigraph" and "topic" elements.
* tools/rst2html4.py: New front-end.
Modified: trunk/docutils/docs/user/latex.txt
===================================================================
--- trunk/docutils/docs/user/latex.txt 2017-03-07 13:06:08 UTC (rev 8045)
+++ trunk/docutils/docs/user/latex.txt 2017-03-11 12:09:36 UTC (rev 8046)
@@ -7,18 +7,21 @@
:Revision: $Revision$
:Date: $Date$
:Copyright: This document has been placed in the public domain.
+:Abstract: This document covers topics specific to Docutils' LaTeX_ export.
.. contents::
.. sectnum::
-Introduction
-============
-This document covers topics specific to Docutils' LaTeX__ export. For
-an introduction to LaTeX see, e.g., `LaTeX2e for authors`_. There exists
-a wide selecton of `LaTeX Documentation on the net`_ and
-`books on LaTeX and related topics`_.
+LaTeX
+=====
+LaTeX__, is a document preparation system for high-quality typesetting. It
+is most often used for medium-to-large technical or scientific documents but
+it can be used for almost any form of publishing. There exists a wide
+selecton of `LaTeX Documentation on the net`_ and `books on LaTeX and
+related topics`_. For an introduction to LaTeX see, e.g., `LaTeX2e for
+authors`_.
__ http://www.latex-project.org/
.. _LaTeX2e for authors:
@@ -28,28 +31,27 @@
.. _books on LaTeX and related topics:
http://www.latex-project.org/guides/books.html
-LaTeX
-=====
-Unlike HTML with CSS, LaTeX uses one common language for markup and
-style definitions. Separation of content and style is realized by
-collecting style definitions in the documentclass_, `LaTeX packages`_,
-or the document preamble.
+.. _LaTeX packages:
-LaTeX packages
---------------
+LaTeX classes and packages
+--------------------------
-LaTeX packages (similar to Python modules or C libraries) provide
-means to extend or modify the LaTeX language by redefining macros or
-providing new ones. There is a *huge* selection of packages (standard
-as well as user contributed) coming with your TeX distribution or
-available at CTAN_.
+Unlike HTML with CSS, LaTeX uses one common language for markup and style
+definitions. Separation of content and style is realized by collecting style
+definitions in LaTeX classes and packages, or the
+`document preamble <LaTeX preamble_>`_.
+LaTeX document classes and packages (similar to Python modules or C
+libraries) provide means to extend or modify the LaTeX language by
+redefining macros or providing new ones.
+
+Using the `document class`_ and `style sheet`_ configuration options, you
+can select from a *huge* selection of classes and packages (standard as well
+as user contributed) coming with your TeX distribution or available at
+CTAN_ as well as custom style sheets.
+
.. _CTAN: http://www.ctan.org
-.. _stylesheet:
- config.html#stylesheet-latex2e-writer
-.. _TeX input path:
- http://www.tex.ac.uk/cgi-bin/texfaq2html?label=what-TDS
Docutils specific LaTeX macros
@@ -74,9 +76,6 @@
See the test output standalone_rst_latex.tex_ for an example of the fallback
definitions and their use in the document.
-.. _standalone_rst_latex.tex:
- ../../test/functional/expected/standalone_rst_latex.tex
-
.. [#] DU for Documentation Utilities = Docutils
@@ -125,22 +124,13 @@
_`pdflatex`
Generates a PDF document directly from the LaTeX file.
-_`latex + dvipdfmx`
- Use ``latex`` to generate a DVI file and ``dvipdfmx`` to produce a PDF
- file. If you take this approach, add ``dvipdfmx`` to the
- _documentoptions.
-
-_`latex` + dvips + ps2pdf
- Produce a DVI file with ``latex``, postscript with ``dvips`` and PDF with
- ``ps2pdf``.
-
_`xelatex` or _`lualatex`
The `XeTeX`_ and LuaTeX_ engines work with input files in UTF-8 encoding
and system fonts. Export your document with the `xetex` writer
(``rst2xetex``), if you want to go this route.
-You need to call latex (or pdflatex/xelatex/lualatex) twice (or even three times) to
-get internal references correct.
+You may need to call latex two or three times to get internal references
+correct.
.. _documentoptions: config.html#documentoptions
.. _xetex: http://tug.org/xetex/
@@ -159,16 +149,8 @@
Configuration
=============
-The LaTeX code generation can be configured via
+.. contents:: :local:
-* configuration `options/settings`_ of the Docutils writer,
-* `LaTeX packages`_,
-* custom `LaTeX code`_ in
- + `style sheets`_,
- + the `LaTeX preamble`_,
- + the document body (`raw LaTeX`_), or
- + custom templates_.
-
.. _option:
Options/Settings
@@ -176,11 +158,13 @@
Options can be specified as
-* command-line options (run ``rst2latex.py --help`` to get a list of
- available options), or
+* command-line options, or
-* configuration settings (see `Docutils Configuration`_ for details).
+* configuration settings.
+Run ``rst2latex.py --help`` to get a list of available options;
+see `Docutils Configuration`_ for details.
+
.. _Docutils Configuration:
config.html
@@ -187,81 +171,65 @@
Classes
-------
-The `classes attribute`_ is one of the common attributes, shared by all
+The `"classes" attribute`_ is one of the common attributes, shared by all
Docutils elements.
-
- The purpose of the attribute is to indicate an “is-a” variant
- relationship, to allow an extensible way of defining sub-classes of
- existing elements. It can be used to carry context forward between a
- Docutils Reader and Writer, when a custom structure is reduced to a
- standardized document tree.
-
In HTML, the common use is to provide selection criteria for style rules in
CSS stylesheets. As there is no comparable framework for LaTeX, Docutils
mimics some of this behaviour via `Docutils specific LaTeX macros`_.
-* Role names and class arguments are converted to conform to the regular
- expression ``[a-z][-a-z0-9]*`` (lowercase letters, digits and hyphen, see
- `class directive`_).
+*Inline elements*
+ are handled via the ``\DUrole{}`` macro, that calls the optional styling
+ command ``\DUrole«classargument»`` with one argument (the role content).
+ See `custom interpreted text roles`_.
-* Class arguments may contain numbers and hyphens, which need special
- treatment in LaTeX command names. (The commands ``\begincsname`` and
- ``\endcsname`` or the special command ``\@namedef`` can
- help with the definition of corresponding macros or environments.)
+*Block level elements*
+ are wrapped in "class environments":
+ ``\begin{DUclass}`` calls the optional styling command
+ ``\DUCLASSe«classargument»{}``, ``\end{DUclass}`` tries
+ ``\endDUCLASS«classargument»``.
-* Elements can have multiple class arguments. In contrast to HTML/CSS, the
- order of the class arguments cannot be ignored in LaTeX
+Customization is done by defining matching macros or environments.
-For inline elements, classes are handled via the ``\DUrole{}`` macro,
-see `custom interpreted text roles`_.
+Example 1:
+ Use small caps font inside elements with class value "custom".
-Block level elements are wrapped in "class environments".
+ *Inline elements*
+ The LaTeX function ``\textsc`` sets the argument in small caps::
-* ``\begin{DUclass}{«classargument»}``
- calls the macro named ``\DUCLASSe«classargument»{}`` if it is defined and
- silently ignores this class argument if a corresponding macro is not
- defined.
+ \newcommand{\DUrolesmallcaps}[1]{\textsc{#1}}
-* ``\end{DUclass}{«classargument»}``
- tries ``\endDUCLASS«classargument»``
+ *Block-level elements*
+ The LaTeX directive (macro without argument) ``\scshape`` switches to
+ the small caps font. Its effect is confined to the wrapper ``DUclass``
+ environment::
-Customization is done by defining matching macros or environments.
+ \newcommand*{\DUCLASScustom}{\scshape}
-Examples:
-
-* Use small caps font inside elements with class value "custom"::
-
- \newcommand*{\DUCLASScustom}{\scshape}
-
- The LaTeX directive (macro without argument) ``\scshape`` switches to the
- small caps font. Its effect is confined to the wrapper ``DUclass``
- environment.
-
-* Turn bullet lists with class value "enumerateitems" into enumerated
+Example 2:
+ It is even possible to locally redefine other LaTeX macros, e.g. to
+ turn bullet lists with class value "enumerateitems" into enumerated
lists::
- \newcommand*{\DUCLASSenumerateitems}{%
- \renewenvironment{itemize}{\begin{enumerate}}%
- {\end{enumerate}}%
- }
+ \newcommand*{\DUCLASSenumerateitems}{%
+ \renewenvironment{itemize}{\begin{enumerate}}%
+ {\end{enumerate}}%
+ }
- The matching LaTeX macro can also locally redefine other LaTeX macros.
-* Draw a line below elements with class value "rulebelow"::
+Class arguments may contain numbers and hyphens, which need special
+treatment in LaTeX command names (see `class directive`_). The commands
+``\begincsname`` and ``\endcsname`` or the special command ``\@namedef`` can
+help with the definition of corresponding macros or environments.
- \newenvironment{DUCLASSrulebelow}%
- {} % nothing to do at start of element
- {\noindent\rule[0.5ex]{1\columnwidth}{1pt}}
+* Elements can have multiple class arguments. In contrast to HTML/CSS, the
+ order of the class arguments cannot be ignored in LaTeX
- The new LaTeX environment executes the rule-drawing macro when leaving the
- wrapper ``DUclass`` environment.
-
For "historical reasons", class handling differs for some elements and class
values:
* The special macros ``\DUadmonition``, ``\DUtitle``, and ``\DUtopic`` are
- handed a comma separated list of class values as optional argument. See
- the sections on admonitions_, titles_, and the `topic element`_ for
+ written with a comma separated list of class values as optional argument.
+ See the sections on admonitions_, titles_, and the `topic element`_ for
customization examples.
* Class argument values starting with ``align-`` are transformed to "align"
@@ -270,10 +238,9 @@
* The table element recognizes some special class values. See section table_.
+.. _"classes" attribute: ../ref/doctree.html#classes
-.. _classes attribute: ../ref/doctree.html#classes
-
LaTeX code
----------
@@ -280,6 +247,21 @@
Custom LaTeX code can be placed in `style sheets`_, the
`LaTeX preamble`_, the document body (`raw LaTeX`_), or custom templates_.
+The functional tests that come with Docutils, can serve as example.
+
+input:
+ standalone_rst_latex.txt_ (includes files from `tests/functional/input/data`_)
+expected output:
+ standalone_rst_latex.tex_
+
+.. _standalone_rst_latex.txt:
+ https://sf.net/p/docutils/code/HEAD/tree/trunk/docutils/test/functional/input/standalone_rst_latex.txt
+.. _tests/functional/input/data:
+ https://sf.net/p/docutils/code/HEAD/tree/trunk/docutils/test/functional/input/data
+.. _standalone_rst_latex.tex:
+ https://sf.net/p/docutils/code/HEAD/tree/trunk/docutils/test/functional/expected/standalone_rst_latex.tex?format=raw
+
+
.. _style sheet:
.. _custom style sheets:
@@ -295,7 +277,7 @@
stylesheet_
It is possible to specify multiple style sheets and mix `LaTeX
- packages`_ with `custom style sheets`_.
+ packages`_ with custom style sheets.
You cannot specify package options with the stylesheet_ setting. If
you need to pass options to the package, use the ``\usepackage``
@@ -326,6 +308,8 @@
.. _clsguide:
http://mirror.ctan.org/macros/latex/doc/clsguide.pdf
+.. _stylesheet:
+ config.html#stylesheet-latex2e-writer
.. _embed-stylesheet:
config.html#embed-stylesheet-latex2e-writer
.. _repository of user-contributed style sheets:
@@ -401,7 +385,8 @@
\[x^3 + 3x^2a + 3xa^2 + a^3,\]
- (Drawback: the formula will be invisible in other output formats.)
+ (Drawback: the formula will be invisible in other output formats. Better
+ use the `math directive`_)
Most LaTeX code examples also work as raw LaTeX inside the document.
An exception are commands that need to be given in the document
@@ -423,6 +408,8 @@
.. _raw directive:
../ref/rst/directives.html#raw
+.. _math directive:
+ ../ref/rst/directives.html#math
@@ -492,12 +479,13 @@
Commands:
``\DUrole``: dispatcher command
- ``\DUrole«classargument»``: optional styling command
+ ``\DUrole«classargument»``: optional styling command with 1 argument (the
+ role content).
Default:
- The definition of ``\DUrole{«classargument»}{}`` calls the macro named
- ``\DUrole«classargument»{}`` if it is defined and silently ignores
- this class argument if a corresponding macro is not defined.
+ The default definition of ``\DUrole{«classargument»}{}`` calls the macro
+ named ``\DUrole«classargument»{}`` if it is defined and silently ignores
+ this class argument if not.
Example 1:
Typeset text in small caps::
@@ -1289,6 +1277,27 @@
http://mirror.ctan.org/macros/latex/contrib/listings/listings.pdf
+lists
+-----
+
+Remove extra vertical whitespace between items of bullet lists and
+enumerated lists.
+
+Example:
+ Pass the class argument "compact" to the list::
+
+ .. class:: compact
+
+ * first item
+ * second item
+
+ The following lines for the `LaTeX preamble`_ use the enumitem_ package to
+ remove spacing from all lists with class argument "compact"::
+
+ \usepackage{enumitem}
+ \newcommand*{\DUCLASScompact}{\setlist{noitemsep}}
+
+
list of figures/tables
----------------------
@@ -1481,7 +1490,7 @@
Indent the first line in a paragraph unless it is the first line of a
chapter, section, subsection, or subsubsection.
-Example:
+Example 1:
To set paragraph indentation to zero but add a vertical space between
load the `parskip` package with the command line argument::
@@ -1491,7 +1500,18 @@
\usepackage{parskip}
+Example 2:
+ To suppress the indentation of a specific paragraph, you may give it the
+ class "noindent" with, e.g. ::
+ .. class:: noindent
+
+ This paragraph should not be indented.
+
+ and define the `custom role`_ command::
+
+ \newcommand{\DUrolenoindent}[1]{\noindent #1}
+
rubric
------
@@ -1592,35 +1612,17 @@
topic element
-------------
-A topic_ is like a block quote with a title, or a self-contained section with
-no subsections.
+A topic_ is like a block quote with a title, or a self-contained section
+with no subsections. Topics and rubrics can be used at places where a
+`section title`_ is not allowed (e.g. inside a directive).
-Topics and rubrics can be used at places where a `section title`_ is not
-allowed (e.g. inside a directive).
+Example:
+ Use a standard paragraph for a topic::
-Command:
- ``DUtopic``
-
-Default:
- "quote" environment
-
-Example 1:
- If you generally prefer a "normal" section over a block quote, define::
-
- \newcommand{\DUtopic}[2][class-arg]{%
- \ifcsname DUtopic#1\endcsname%
- \csname DUtopic#1\endcsname{#2}%
- \else
- #2
- \fi
+ \newcommand{\DUCLASStopic}{%
+ \renewenvironment{quote}{}{}%
}
-Example 2:
- If you want a "normal" section for topics with class argument "noquote",
- define::
-
- \newcommand{\DUtopicnoquote}[1]{#1}
-
.. _topic: ../ref/rst/directives.html#topic
.. _section title: ../ref/rst/restructuredtext.html#sections
@@ -1860,14 +1862,12 @@
a) compile with pdflatex_,
- b) compile with `latex + dvipdfmx`_,
+ b) use the package breakurl_,
- c) use the package breakurl_,
+ c) (for printout) `disable hyperlinks`_ using the package "nohyperref".
- d) (for printout) `disable hyperlinks`_ using the package "nohyperref".
+See also the `Link text doesn’t break at end line`_ LaTeX FAQ entry.
-See also the `Link text doesn’t break at end line`_ FAQ entry.
-
.. _breakurl:
http://mirror.ctan.org/help/Catalogue/entries/breakurl.html
@@ -1893,24 +1893,9 @@
"latin1"). Newer versions of hyperref default to "unicode=true" if the
document language is "russian".
-However, this setting leads to "strange" characters in the bookmarks
-if used with xelatex_ in hyperref versions before v6.79g (2009/11/20).
-(cf `bugreport 3100778`__).
+.. _hyperref_option: config.html#stylesheet-latex2e-writer
-If updating the hyperref package is not an option, the workaround is
-to set ::
- --hyperref-option="unicode=false"
-
-or (in the config file)::
-
- [xetex writer]
-
- hyperref-option: unicode=false
-
-__ http://sourceforge.net/tracker/?func=detail&aid=3100778&group_id=38414&atid=422030
-.. _hyperref_option: config.html#stylesheet-latex2e-writer
-
image inclusion
```````````````
@@ -1917,10 +1902,10 @@
Images__ are included in LaTeX with the help of the `graphicx` package. The
supported file formats depend on the used driver:
+* pdflatex_ and xelatex_ work with PNG, JPG, or PDF, but **not EPS**.
* Standard latex_ can include **only EPS** graphics, no other format.
-* `latex + dvipdfmx`_ works with EPS and JPG (add 'dvipdfmx' to the
+* latex + dvipdfmx works with EPS and JPG (add 'dvipdfmx' to the
documentoptions_ and 'bmpsize' to the stylesheet_ setting).
-* pdflatex_ and xelatex_ work with PNG, JPG, or PDF, but **not EPS**.
If PDF-image inclusion in PDF files fails, specifying
``--graphicx-option=pdftex`` or ``--graphicx-option=auto`` might help.
@@ -2038,7 +2023,7 @@
.. _Docutils TODO list: ../dev/todo.html#latex-writer
.. _bugs: ../../BUGS.html
.. _SourceForge Bug Tracker:
- http://sourceforge.net/tracker/?group_id=38414&atid=422030
+ http://sf.net/tracker/?group_id=38414&atid=422030
Footnotes and citations
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2017-03-07 13:06:08 UTC (rev 8045)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2017-03-11 12:09:36 UTC (rev 8046)
@@ -476,7 +476,7 @@
PreambleCmds.abstract = r"""
% abstract title
-\providecommand*{\DUtitleabstract}[1]{\centering\textbf{#1}}"""
+\providecommand*{\DUtitleabstract}[1]{\centerline{\textbf{#1}}}"""
PreambleCmds.admonition = r"""
% admonition (specially marked topic)
@@ -504,7 +504,9 @@
PreambleCmds.dedication = r"""
% dedication topic
-\providecommand{\DUtopicdedication}[1]{\begin{center}#1\end{center}}"""
+\providecommand*{\DUCLASSdedication}{%
+ \renewenvironment{quote}{\begin{center}}{\end{center}}%
+}"""
PreambleCmds.duclass = r"""
% class handling for environments (block-level elements)
@@ -520,7 +522,6 @@
PreambleCmds.error = r"""
% error admonition title
\providecommand*{\DUtitleerror}[1]{\DUtitle{\color{red}#1}}"""
-# PreambleCmds.errortitle._depends = 'color'
PreambleCmds.fieldlist = r"""
% fieldlist environment
@@ -674,16 +675,6 @@
\fi
}"""
-PreambleCmds.topic = r"""
-% topic (quote with heading)
-\providecommand{\DUtopic}[2][class-arg]{%
- \ifcsname DUtopic#1\endcsname%
- \csname DUtopic#1\endcsname{#2}%
- \else
- \begin{quote}#2\end{quote}
- \fi
-}"""
-
PreambleCmds.transition = r"""
% transition (break, fancybreak, anonymous section)
\providecommand*{\DUtransition}{%
@@ -2441,8 +2432,8 @@
if isinstance(node.parent, nodes.line_block):
self.out.append('\\item[]\n'
'\\begin{DUlineblock}{\\DUlineblockindent}\n')
+ # nested line-blocks cannot be given class arguments
else:
- # nested line-blocks cannot be given class arguments
self.duclass_open(node)
self.out.append('\\begin{DUlineblock}{0em}\n')
self.insert_align_declaration(node)
@@ -3096,7 +3087,6 @@
depth = node.get('depth', 0)
if 'local' in node['classes']:
self.minitoc(node, title, depth)
- self.context.append('')
return
if depth:
self.out.append('\\setcounter{tocdepth}{%d}\n' % depth)
@@ -3108,29 +3098,31 @@
else: # Docutils generated contents list
# set flag for visit_bullet_list() and visit_title()
self.is_toc_list = True
- self.context.append('')
elif ('abstract' in node['classes'] and
self.settings.use_latex_abstract):
self.push_output_collector(self.abstract)
self.out.append('\\begin{abstract}')
- self.context.append('\\end{abstract}\n')
if isinstance(node.next_node(), nodes.title):
node.pop(0) # LaTeX provides its own title
else:
- self.fallbacks['topic'] = PreambleCmds.topic
# special topics:
if 'abstract' in node['classes']:
self.fallbacks['abstract'] = PreambleCmds.abstract
self.push_output_collector(self.abstract)
- if 'dedication' in node['classes']:
+ elif 'dedication' in node['classes']:
self.fallbacks['dedication'] = PreambleCmds.dedication
self.push_output_collector(self.dedication)
- self.out.append('\n\\DUtopic[%s]{\n' % ','.join(node['classes']))
- self.context.append('}\n')
+ else:
+ node['classes'].insert(0, 'topic')
+ self.visit_block_quote(node)
def depart_topic(self, node):
- self.out.append(self.context.pop())
self.is_toc_list = False
+ if ('abstract' in node['classes']
+ and self.settings.use_latex_abstract):
+ self.out.append('\\end{abstract}\n')
+ elif not 'contents' in node['classes']:
+ self.depart_block_quote(node)
if ('abstract' in node['classes'] or
'dedication' in node['classes']):
self.pop_output_collector()
Modified: trunk/docutils/docutils/writers/latex2e/xelatex.tex
===================================================================
--- trunk/docutils/docutils/writers/latex2e/xelatex.tex 2017-03-07 13:06:08 UTC (rev 8045)
+++ trunk/docutils/docutils/writers/latex2e/xelatex.tex 2017-03-11 12:09:36 UTC (rev 8046)
@@ -4,8 +4,8 @@
% \defaultfontfeatures{Scale=MatchLowercase}
% straight double quotes (defined T1 but missing in TU):
\ifdefined \UnicodeEncodingName
- \ProvideTextCommand{\textquotedbl}{\UnicodeEncodingName}{%
- {\addfontfeatures{Ligatures=ResetAll}\char"0022}}
+ \DeclareTextCommand{\textquotedbl}{\UnicodeEncodingName}{%
+ {\addfontfeatures{RawFeature=-tlig,Mapping=}\char34}}%
\fi
$requirements
%%% Custom LaTeX preamble
Modified: trunk/docutils/docutils/writers/xetex/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/xetex/__init__.py 2017-03-07 13:06:08 UTC (rev 8045)
+++ trunk/docutils/docutils/writers/xetex/__init__.py 2017-03-11 12:09:36 UTC (rev 8046)
@@ -99,6 +99,11 @@
for key in ('af', # 'afrikaans',
'de-AT', # 'naustrian',
'de-AT-1901', # 'austrian',
+ # TODO: use variant=... for English variants
+ 'en-CA', # 'canadian',
+ 'en-GB', # 'british',
+ 'en-NZ', # 'newzealand',
+ 'en-US', # 'american',
'fr-CA', # 'canadien',
'grc-ibycus', # 'ibycus', (Greek Ibycus encoding)
'sr-Latn', # 'serbian script=latin'
@@ -110,7 +115,7 @@
self.reporter = reporter
self.language = self.language_name(language_code)
self.otherlanguages = {}
- self.warn_msg = 'Language "%s" not supported by XeTeX (polyglossia).'
+ self.warn_msg = 'Language "%s" not supported by Polyglossia.'
self.quote_index = 0
self.quotes = ('"', '"')
# language dependent configuration:
Modified: trunk/docutils/test/functional/expected/standalone_rst_latex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-07 13:06:08 UTC (rev 8045)
+++ trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-11 12:09:36 UTC (rev 8046)
@@ -52,7 +52,7 @@
}
% abstract title
-\providecommand*{\DUtitleabstract}[1]{\centering\textbf{#1}}
+\providecommand*{\DUtitleabstract}[1]{\centerline{\textbf{#1}}}
% admonition (specially marked topic)
\providecommand{\DUadmonition}[2][class-arg]{%
@@ -67,7 +67,9 @@
}
% dedication topic
-\providecommand{\DUtopicdedication}[1]{\begin{center}#1\end{center}}
+\providecommand*{\DUCLASSdedication}{%
+ \renewenvironment{quote}{\begin{center}}{\end{center}}%
+}
% docinfo (width of docinfo table)
\DUprovidelength{\DUdocinfowidth}{0.9\linewidth}
@@ -172,15 +174,6 @@
% titlereference role
\providecommand*{\DUroletitlereference}[1]{\textsl{#1}}
-% topic (quote with heading)
-\providecommand{\DUtopic}[2][class-arg]{%
- \ifcsname DUtopic#1\endcsname%
- \csname DUtopic#1\endcsname{#2}%
- \else
- \begin{quote}#2\end{quote}
- \fi
-}
-
% transition (break, fancybreak, anonymous section)
\providecommand*{\DUtransition}{%
\hspace*{\fill}\hrulefill\hspace*{\fill}
@@ -261,19 +254,25 @@
\end{tabularx}
\end{center}
-\DUtopic[dedication]{
+\begin{DUclass}{dedication}
+\begin{quote}
\DUtitle[dedication]{Dedication}
For Docutils users \& co-developers.
-}
-\DUtopic[abstract]{
+\end{quote}
+\end{DUclass}
+
+\begin{DUclass}{abstract}
+\begin{quote}
\DUtitle[abstract]{Abstract}
This is a test document, containing at least one example of each
reStructuredText construct.
-}
+\end{quote}
+\end{DUclass}
+
% This is a comment. Note how any initial comments are moved by
% transforms to after the document title, subtitle, and docinfo.
@@ -1252,12 +1251,15 @@
A \emph{topic} is like a block quote with a title, or a self-contained section
with no subsections.
-\DUtopic[]{
-\DUtitle[title]{Topic Title}
+\begin{DUclass}{topic}
+\begin{quote}
+\DUtitle[topic]{Topic Title}
This is a topic.
-}
+\end{quote}
+\end{DUclass}
+
A \emph{rubric} is like an informal heading that doesn’t correspond to the
document’s structure. It is typically highlighted in red (hence the name).
@@ -1855,7 +1857,8 @@
Class arguments may contain numbers and hyphens, which need special
treatment in LaTeX command names.
-\DUrole{large}{\DUrole{custom4}{\DUrole{small-caps}{\DUrole{custom-role}{\DUrole{custom-role}{Interpreted Text}}}}}
+\DUrole{large}{\DUrole{custom4}{\DUrole{small-caps}{\DUrole{custom-role}{\DUrole{custom-role}{Text with role “custom4”}}}}} (but without styling by \texttt{DUrole*}
+macros).
\item With LaTeX, roles can be styled within the document using the \DUroletitlereference{raw}
directive.
@@ -1866,7 +1869,7 @@
\@namedef{DUrolecustom4}{\textbf}
\makeatother
-\DUrole{large}{\DUrole{custom4}{\DUrole{small-caps}{\DUrole{custom-role}{\DUrole{custom-role}{Interpreted Text}}}}} in large, bold, small-caps.
+\DUrole{large}{\DUrole{custom4}{\DUrole{small-caps}{\DUrole{custom-role}{\DUrole{custom-role}{inline text}}}}} in large, bold, small-caps.
\item Custom roles can be based on standard roles:
@@ -1885,7 +1888,119 @@
\end{itemize}
-\subsection{3.2~~~More Tables%
+\subsection{3.2~~~class handling%
+ \label{class-handling}%
+}
+
+This section tests class handling for block level elements by the LaTeX
+writer. See the input file \texttt{classes\_latex.txt} for the raw LaTeX code used
+to style the examples.
+
+An “epigraph” directive is exported as “quote” wrapped in a “DUclass”
+environment. Here, it is styled by a “DUCLASSepigraph” environment
+redefining the “quote” environment as “minipage”:
+
+\newcommand*{\DUCLASSepigraph}{%
+ \renewenvironment{quote}{\vspace{1em}
+ \footnotesize\hfill{}%
+ \begin{minipage}{0.4\columnwidth}}%
+ {\end{minipage}\vskip\baselineskip}}
+
+\begin{DUclass}{epigraph}
+\begin{quote}
+
+Do not play this piece fast. It is never right to play \emph{Ragtime} fast.
+\nopagebreak
+
+\raggedleft —Scott Joplin
+
+\end{quote}
+\end{DUclass}
+
+Raw latex is also used to style the following lists: “DUCLASSenumerateitems”
+redefines “itemize” as “enumerate”, “DUCLASSrules” draws horizontal lines
+above and below.
+
+\newcommand*{\DUCLASSenumerateitems}{%
+ \renewenvironment{itemize}{\begin{enumerate}}%
+ {\end{enumerate}}%
+}
+
+\newenvironment{DUCLASSrules}%
+ {\noindent\rule[0.5ex]{1\columnwidth}{1pt}}%
+ {\noindent\rule[0.5ex]{1\columnwidth}{1pt...
[truncated message content] |
|
From: <mi...@us...> - 2017-03-13 21:49:20
|
Revision: 8048
http://sourceforge.net/p/docutils/code/8048
Author: milde
Date: 2017-03-13 21:49:17 +0000 (Mon, 13 Mar 2017)
Log Message:
-----------
Remove unneccessary empty lines from the latex output.
This is a cosmetic change without influence on the result of the latex
compilation.
Modified Paths:
--------------
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/docutils/writers/latex2e/default.tex
trunk/docutils/test/functional/expected/cyrillic.tex
trunk/docutils/test/functional/expected/latex_babel.tex
trunk/docutils/test/functional/expected/latex_cornercases.tex
trunk/docutils/test/functional/expected/latex_docinfo.tex
trunk/docutils/test/functional/expected/standalone_rst_latex.tex
trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
trunk/docutils/test/test_writers/test_latex2e.py
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2017-03-13 21:49:17 UTC (rev 8048)
@@ -660,6 +660,12 @@
PreambleCmds.textcomp = """\
\\usepackage{textcomp} % text symbol macros"""
+PreambleCmds.textsubscript = r"""
+% text mode subscript
+\ifx\textsubscript\undefined
+ \usepackage{fixltx2e} % since 2015 loaded by default
+\fi"""
+
PreambleCmds.titlereference = r"""
% titlereference role
\providecommand*{\DUroletitlereference}[1]{\textsl{#1}}"""
@@ -1655,7 +1661,7 @@
# strip the generic 'admonition' from the list of classes
node['classes'] = [cls for cls in node['classes']
if cls != 'admonition']
- self.out.append('\n\\DUadmonition[%s]{\n' % ','.join(node['classes']))
+ self.out.append('\n\\DUadmonition[%s]{' % ','.join(node['classes']))
def depart_admonition(self, node=None):
self.out.append('}\n')
@@ -1675,24 +1681,24 @@
def visit_block_quote(self, node):
self.duclass_open(node)
- self.out.append( '\\begin{quote}\n')
+ self.out.append( '\\begin{quote}')
def depart_block_quote(self, node):
- self.out.append( '\n\\end{quote}\n')
+ self.out.append( '\\end{quote}\n')
self.duclass_close(node)
def visit_bullet_list(self, node):
self.duclass_open(node)
if self.is_toc_list:
- self.out.append( '\\begin{list}{}{}\n' )
+ self.out.append( '\\begin{list}{}{}' )
else:
- self.out.append( '\\begin{itemize}\n' )
+ self.out.append( '\\begin{itemize}' )
def depart_bullet_list(self, node):
if self.is_toc_list:
- self.out.append( '\n\\end{list}\n' )
+ self.out.append( '\\end{list}\n' )
else:
- self.out.append( '\n\\end{itemize}\n' )
+ self.out.append( '\\end{itemize}\n' )
self.duclass_close(node)
def visit_superscript(self, node):
@@ -1706,7 +1712,8 @@
self.out.append('}')
def visit_subscript(self, node):
- self.out.append(r'\textsubscript{') # requires `fixltx2e`
+ self.fallbacks['textsubscript'] = PreambleCmds.textsubscript
+ self.out.append(r'\textsubscript{')
if node['classes']:
self.visit_inline(node)
@@ -1792,7 +1799,7 @@
self.out.append( '(\\textbf{' )
def depart_classifier(self, node):
- self.out.append( '})\n' )
+ self.out.append( '})' )
def visit_colspec(self, node):
self.active_table.visit_colspec(node)
@@ -2126,18 +2133,18 @@
self.duclass_open(node)
if enumeration_level <= 4:
- self.out.append('\\begin{enumerate}\n')
+ self.out.append('\\begin{enumerate}')
if (prefix, enumtype, suffix
) != labels[enumeration_level-1]:
- self.out.append('\\renewcommand{\\label%s}{%s}\n' %
+ self.out.append('\n\\renewcommand{\\label%s}{%s}' %
(counter_name, label))
else:
self.fallbacks[counter_name] = '\\newcounter{%s}' % counter_name
self.out.append('\\begin{list}')
self.out.append('{%s}' % label)
- self.out.append('{\\usecounter{%s}}\n' % counter_name)
+ self.out.append('{\\usecounter{%s}}' % counter_name)
if 'start' in node:
- self.out.append('\\setcounter{%s}{%d}\n' %
+ self.out.append('\n\\setcounter{%s}{%d}' %
(counter_name,node['start']-1))
@@ -2154,7 +2161,7 @@
pass
def depart_field(self, node):
- self.out.append('\n')
+ pass
##self.out.append('%[depart_field]\n')
def visit_field_argument(self, node):
@@ -2168,13 +2175,13 @@
def depart_field_body(self, node):
if self.out is self.docinfo:
- self.out.append(r'\\')
+ self.out.append(r'\\'+'\n')
def visit_field_list(self, node):
self.duclass_open(node)
if self.out is not self.docinfo:
self.fallbacks['fieldlist'] = PreambleCmds.fieldlist
- self.out.append('\\begin{DUfieldlist}\n')
+ self.out.append('\\begin{DUfieldlist}')
def depart_field_list(self, node):
if self.out is not self.docinfo:
@@ -2187,7 +2194,7 @@
else:
# Commands with optional args inside an optional arg must be put
# in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
- self.out.append('\\item[{')
+ self.out.append('\n\\item[{')
def depart_field_name(self, node):
if self.out is self.docinfo:
@@ -2618,10 +2625,10 @@
self.fallbacks['_providelength'] = PreambleCmds.providelength
self.fallbacks['optionlist'] = PreambleCmds.optionlist
self.duclass_open(node)
- self.out.append('\\begin{DUoptionlist}\n')
+ self.out.append('\\begin{DUoptionlist}')
def depart_option_list(self, node):
- self.out.append('\n\\end{DUoptionlist}\n')
+ self.out.append('\\end{DUoptionlist}\n')
self.duclass_close(node)
def visit_option_list_item(self, node):
@@ -2784,7 +2791,7 @@
self.duclass_open(node)
self.requirements['color'] = PreambleCmds.color
self.fallbacks['sidebar'] = PreambleCmds.sidebar
- self.out.append('\\DUsidebar{\n')
+ self.out.append('\\DUsidebar{')
def depart_sidebar(self, node):
self.out.append('}\n')
@@ -2850,7 +2857,7 @@
self.fallbacks['title'] = PreambleCmds.title
node['classes'] = ['system-message']
self.visit_admonition(node)
- self.out.append('\\DUtitle[system-message]{system-message}\n')
+ self.out.append('\n\\DUtitle[system-message]{system-message}\n')
self.append_hypertargets(node)
try:
line = ', line~%s' % node['line']
@@ -2989,7 +2996,7 @@
classes = ','.join(node.parent['classes'])
if not classes:
classes = node.tagname
- self.out.append('\\DUtitle[%s]{' % classes)
+ self.out.append('\n\\DUtitle[%s]{' % classes)
self.context.append('}\n')
# Table caption
elif isinstance(node.parent, nodes.table):
@@ -3075,7 +3082,7 @@
self.out += self.ids_to_labels(node)
# add contents to PDF bookmarks sidebar
if isinstance(node.next_node(), nodes.title):
- self.out.append('\n\\pdfbookmark[%d]{%s}{%s}\n' %
+ self.out.append('\n\\pdfbookmark[%d]{%s}{%s}' %
(self.section_level+1,
node.next_node().astext(),
node.get('ids', ['contents'])[0]
@@ -3091,9 +3098,9 @@
if depth:
self.out.append('\\setcounter{tocdepth}{%d}\n' % depth)
if title != 'Contents':
- self.out.append('\\renewcommand{\\contentsname}{%s}\n' %
+ self.out.append('\n\\renewcommand{\\contentsname}{%s}' %
title)
- self.out.append('\\tableofcontents\n\n')
+ self.out.append('\n\\tableofcontents\n')
self.has_latex_toc = True
else: # Docutils generated contents list
# set flag for visit_bullet_list() and visit_title()
@@ -3129,10 +3136,8 @@
def visit_transition(self, node):
self.fallbacks['transition'] = PreambleCmds.transition
- self.out.append('\n\n')
- self.out.append('%' + '_' * 75 + '\n')
- self.out.append(r'\DUtransition')
- self.out.append('\n\n')
+ self.out.append('\n%' + '_' * 75 + '\n')
+ self.out.append('\\DUtransition\n')
def depart_transition(self, node):
pass
Modified: trunk/docutils/docutils/writers/latex2e/default.tex
===================================================================
--- trunk/docutils/docutils/writers/latex2e/default.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/docutils/writers/latex2e/default.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -1,7 +1,4 @@
$head_prefix% generated by Docutils <http://docutils.sourceforge.net/>
-\ifx\textsubscript\undefined
- \usepackage{fixltx2e} % since 2015 loaded by default
-\fi
\usepackage{cmap} % fix search and cut-and-paste in Acrobat
$requirements
%%% Custom LaTeX preamble
Modified: trunk/docutils/test/functional/expected/cyrillic.tex
===================================================================
--- trunk/docutils/test/functional/expected/cyrillic.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/test/functional/expected/cyrillic.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -1,8 +1,5 @@
\documentclass[a4paper,russian]{article}
% generated by Docutils <http://docutils.sourceforge.net/>
-\ifx\textsubscript\undefined
- \usepackage{fixltx2e} % since 2015 loaded by default
-\fi
\usepackage{cmap} % fix search and cut-and-paste in Acrobat
\usepackage{ifthen}
\usepackage[T1,T2A]{fontenc}
Modified: trunk/docutils/test/functional/expected/latex_babel.tex
===================================================================
--- trunk/docutils/test/functional/expected/latex_babel.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/test/functional/expected/latex_babel.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -1,8 +1,5 @@
\documentclass[a4paper]{article}
% generated by Docutils <http://docutils.sourceforge.net/>
-\ifx\textsubscript\undefined
- \usepackage{fixltx2e} % since 2015 loaded by default
-\fi
\usepackage{cmap} % fix search and cut-and-paste in Acrobat
\usepackage{ifthen}
\usepackage[T1]{fontenc}
Modified: trunk/docutils/test/functional/expected/latex_cornercases.tex
===================================================================
--- trunk/docutils/test/functional/expected/latex_cornercases.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/test/functional/expected/latex_cornercases.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -1,8 +1,5 @@
\documentclass[a4paper]{article}
% generated by Docutils <http://docutils.sourceforge.net/>
-\ifx\textsubscript\undefined
- \usepackage{fixltx2e} % since 2015 loaded by default
-\fi
\usepackage{cmap} % fix search and cut-and-paste in Acrobat
\usepackage{ifthen}
\usepackage[T1]{fontenc}
@@ -60,7 +57,6 @@
}
\begin{quote}
-
This block quote comes directly after the section heading and is
followed by a paragraph.
@@ -70,23 +66,18 @@
\nopagebreak
\raggedleft —Attribution
-
\end{quote}
This is a paragraph.
\begin{quote}
-
This block quote does not have an attribution.
-
\end{quote}
This is another paragraph.
\begin{quote}
-
Another block quote at the end of the section.
-
\end{quote}
@@ -95,20 +86,14 @@
}
\begin{quote}
-
Block quote followed by a transition.
-
\end{quote}
-
%___________________________________________________________________________
\DUtransition
-
\begin{quote}
-
Another block quote.
-
\end{quote}
Modified: trunk/docutils/test/functional/expected/latex_docinfo.tex
===================================================================
--- trunk/docutils/test/functional/expected/latex_docinfo.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/test/functional/expected/latex_docinfo.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -1,8 +1,5 @@
\documentclass[a4paper]{article}
% generated by Docutils <http://docutils.sourceforge.net/>
-\ifx\textsubscript\undefined
- \usepackage{fixltx2e} % since 2015 loaded by default
-\fi
\usepackage{cmap} % fix search and cut-and-paste in Acrobat
\usepackage{ifthen}
\usepackage[T1]{fontenc}
Modified: trunk/docutils/test/functional/expected/standalone_rst_latex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -1,8 +1,5 @@
\documentclass[a4paper]{article}
% generated by Docutils <http://docutils.sourceforge.net/>
-\ifx\textsubscript\undefined
- \usepackage{fixltx2e} % since 2015 loaded by default
-\fi
\usepackage{cmap} % fix search and cut-and-paste in Acrobat
\usepackage{ifthen}
\usepackage[T1]{fontenc}
@@ -161,6 +158,11 @@
% subtitle (for topic/sidebar)
\providecommand*{\DUsubtitle}[1]{\par\emph{#1}\smallskip}
+% text mode subscript
+\ifx\textsubscript\undefined
+ \usepackage{fixltx2e} % since 2015 loaded by default
+\fi
+
% title for topics, admonitions, unsupported section levels, and sidebar
\providecommand*{\DUtitle}[2][class-arg]{%
% call \DUtitle#1{#2} if it exists:
@@ -259,7 +261,6 @@
\DUtitle[dedication]{Dedication}
For Docutils users \& co-developers.
-
\end{quote}
\end{DUclass}
@@ -269,7 +270,6 @@
This is a test document, containing at least one example of each
reStructuredText construct.
-
\end{quote}
\end{DUclass}
@@ -289,7 +289,6 @@
\tableofcontents
-
\section{1~~~Structural Elements%
\label{structural-elements}%
}
@@ -316,19 +315,15 @@
Here’s a transition:
-
%___________________________________________________________________________
\DUtransition
-
It divides the section. Transitions may also occur between sections:
-
%___________________________________________________________________________
\DUtransition
-
\section{2~~~Body Elements%
\label{body-elements}%
}
@@ -385,15 +380,12 @@
}
\begin{itemize}
-
\item A bullet list
\begin{itemize}
-
\item Nested bullet list.
\item Nested item 2.
-
\end{itemize}
\item Item 2.
@@ -401,17 +393,14 @@
Paragraph 2 of item 2.
\begin{itemize}
-
\item Nested bullet list.
\item Nested item 2.
\begin{itemize}
-
\item Third level.
\item Item 2.
-
\end{itemize}
\item Nested item 3.
@@ -420,9 +409,7 @@
%
\phantomsection\label{target}
% Even if this item contains a target and a comment.
-
\end{itemize}
-
\end{itemize}
@@ -431,25 +418,20 @@
}
\begin{enumerate}
-
\item Arabic numerals.
\begin{enumerate}
\renewcommand{\labelenumii}{\alph{enumii})}
-
\item lower alpha)
\begin{enumerate}
\renewcommand{\labelenumiii}{(\roman{enumiii})}
-
\item (lower roman)
\begin{enumerate}
-
\item upper alpha.
\begin{list}{\Roman{enumv})}{\usecounter{enumv}}
-
\item upper roman)
\end{list}
\end{enumerate}
@@ -461,7 +443,6 @@
\begin{enumerate}
\renewcommand{\labelenumii}{\arabic{enumii}.}
\setcounter{enumii}{2}
-
\item Three
\item Four
@@ -470,7 +451,6 @@
\begin{enumerate}
\renewcommand{\labelenumii}{\Alph{enumii}.}
\setcounter{enumii}{2}
-
\item C
\item D
@@ -479,7 +459,6 @@
\begin{enumerate}
\renewcommand{\labelenumii}{\roman{enumii}.}
\setcounter{enumii}{2}
-
\item iii
\item iv
@@ -496,7 +475,6 @@
Definition
\item[{Term}] \leavevmode (\textbf{classifier})
-
Definition paragraph 1.
Definition paragraph 2.
@@ -504,9 +482,7 @@
\item[{Term}] \leavevmode
Definition
-\item[{Term}] \leavevmode (\textbf{classifier one})
-(\textbf{classifier two})
-
+\item[{Term}] \leavevmode (\textbf{classifier one})(\textbf{classifier two})
Definition
\end{description}
@@ -532,7 +508,6 @@
\DUrole{credits}{This paragraph has the \DUroletitlereference{credits} class set. (This is actually not
about credits but just for ensuring that the class attribute
doesn’t get stripped away.)}
-
\end{DUfieldlist}
@@ -543,7 +518,6 @@
For listing command-line options:
\begin{DUoptionlist}
-
\item[-a] command-line option “a”
\item[-b file] options can have arguments
@@ -566,7 +540,6 @@
\item[-1 file, -{}-one=file, -{}-two file] Multiple options with arguments.
\item[/V] DOS/VMS-style options too
-
\end{DUoptionlist}
There must be at least two spaces between the option and the
@@ -642,7 +615,6 @@
Take it away, Eric the Orchestra Leader!
\begin{quote}
-
\begin{DUlineblock}{0em}
\item[] A one, two, a one two three four
\item[]
@@ -673,7 +645,6 @@
\end{DUlineblock}
\item[] Singing…
\end{DUlineblock}
-
\end{quote}
A line block, like the following poem by Christian Morgenstern, can
@@ -704,7 +675,6 @@
Block quotes consist of indented body elements:
\begin{quote}
-
My theory by A. Elk. Brackets Miss, brackets. This theory goes
as follows and begins now. All brontosauruses are thin at one
end, much much thicker in the middle and then thin again at the
@@ -713,7 +683,6 @@
\nopagebreak
\raggedleft —Anne Elk (Miss)
-
\end{quote}
The language of a quote (like any other object) can be specified by
@@ -723,10 +692,8 @@
\begin{selectlanguage}{french}
\begin{quote}
-
ReStructuredText est un langage de balisage léger utilisé
notamment dans la documentation du langage Python.
-
\end{quote}
\end{selectlanguage}
@@ -1190,7 +1157,6 @@
\DUtitle[important]{Important}
\begin{itemize}
-
\item Wash behind your ears.
\item Clean up your room.
@@ -1198,7 +1164,6 @@
\item Call your mother.
\item Back up your data.
-
\end{itemize}
}
@@ -1256,7 +1221,6 @@
\DUtitle[topic]{Topic Title}
This is a topic.
-
\end{quote}
\end{DUclass}
@@ -1312,11 +1276,9 @@
Compound 1, paragraph 2.
\begin{itemize}
-
\item Compound 1, list item one.
\item Compound 1, list item two.
-
\end{itemize}
\end{DUclass}
@@ -1668,7 +1630,6 @@
}
\begin{itemize}
-
\item A role based on an existing role.
\texttt{\DUrole{custom}{one}} \texttt{\DUrole{custom}{two}} \texttt{\DUrole{custom}{three}}
@@ -1705,7 +1666,6 @@
\end{quote}
\DUrole{green}{\DUrole{sc}{\foreignlanguage{british}{British colourful text in small-caps}}}.
-
\end{itemize}
@@ -1848,7 +1808,6 @@
}
\begin{itemize}
-
\item Role names and class arguments are converted to conform to the
regular expression \texttt{{[}a-z{]}{[}-a-z0-9{]}*} (letters are downcased,
accents and similar decoration is stripped, non-conforming
@@ -1884,7 +1843,6 @@
This is a \textsuperscript{\DUrole{custom-superscript}{customized superscript text role}}
This is a \DUroletitlereference{\DUrole{custom-title-reference}{customized title-reference text role}}
-
\end{itemize}
@@ -1908,12 +1866,10 @@
\begin{DUclass}{epigraph}
\begin{quote}
-
Do not play this piece fast. It is never right to play \emph{Ragtime} fast.
\nopagebreak
\raggedleft —Scott Joplin
-
\end{quote}
\end{DUclass}
@@ -1934,13 +1890,11 @@
\begin{DUclass}{enumerateitems}
\begin{itemize}
-
\item item
\item next item
\item third item
-
\end{itemize}
\end{DUclass}
@@ -1948,11 +1902,9 @@
\begin{DUclass}{rules}
\begin{itemize}
-
\item item
\item next item
-
\end{itemize}
\end{DUclass}
@@ -1959,13 +1911,11 @@
A normal bullet list is kept unchanged by the above redefinitions:
\begin{itemize}
-
\item item
\item next item
\item third item
-
\end{itemize}
A container wraps several elements in a common “class wrapper”. Here, we use
@@ -1980,11 +1930,9 @@
paragraph 2
\begin{itemize}
-
\item bullet list
\item still bullet list
-
\end{itemize}
\end{DUclass}
@@ -2114,25 +2062,19 @@
is contained in a quote
\begin{quote}
-
\begin{DUoptionlist}
-
\item[-{}-help] show help
\item[-v] verbose
-
\end{DUoptionlist}
-
\end{quote}
\begin{description}
\item[{In a definition list:}] \leavevmode
\begin{DUoptionlist}
-
\item[-{}-help] show help
\item[-v] verbose
-
\end{DUoptionlist}
\end{description}
@@ -2527,7 +2469,6 @@
\end{longtable*}
\begin{itemize}
-
\item The following line should not be wrapped, because it uses
no-break spaces (\textbackslash{}u00a0):
@@ -2539,7 +2480,6 @@
pdn\-derd\-mdtd\-ri\-schpdn\-derd\-mdtd\-ri\-schpdn\-derd\-mdtd\-ri\-schpdn\-derd\-mdtd\-ri\-schpdn\-derd\-mdtd\-ri\-sch
pdnderdmdtdrischpdnderdmdtdrischpdnderdmdtdrischpdnderdmdtdrischpdnderdmdtdrisch
-
\end{itemize}
@@ -2550,16 +2490,12 @@
The LaTeX Info pages lists under “2.18 Special Characters”
\begin{quote}
-
The following characters play a special role in LaTeX and are called
“special printing characters”, or simply “special characters”.
\begin{quote}
-
\# \$ \% \& \textasciitilde{} \_ \textasciicircum{} \textbackslash{} \{ \}
-
\end{quote}
-
\end{quote}
The special chars verbatim:
@@ -2573,7 +2509,6 @@
However also \emph{square brackets} {[}{]} need special care.
\begin{quote}
-
Commands with optional arguments (e.g. \texttt{\textbackslash{}item}) check
if the token right after the macro name is an opening bracket.
In that case the contents between that bracket and the following
@@ -2582,17 +2517,14 @@
the square brackets aren’t grouping characters themselves, so in
your last example item{[}{[}…{]}{]} the optional argument consists of
{[}… (without the closing bracket).
-
\end{quote}
Compare the items in the following lists:
\begin{itemize}
-
\item simple item
\item {[}bracketed{]} item
-
\end{itemize}
\begin{description}
@@ -2693,7 +2625,6 @@
Handling by the LaTeX writer:
\begin{itemize}
-
\item \texttt{\#}, \texttt{\textbackslash{}} and \texttt{\%} are escaped:
\begin{DUlineblock}{0em}
@@ -2715,11 +2646,9 @@
\item[] \url{A:DOS\\path\\}\DUfootnotemark{id44}{id41}{9}
\end{DUlineblock}
}
-
\end{itemize}
\begin{itemize}
-
\item \textasciicircum{}\textasciicircum{} LaTeX’s special syntax for characters results in “strange” replacements
(both with href and url). A warning is given.
@@ -2742,7 +2671,6 @@
\item[] \url{../st{r}ange{n}ame}
\item[] \url{../{st{r}ange{n}ame}}
\end{DUlineblock}
-
\end{itemize}
Modified: trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2017-03-12 15:59:37 UTC (rev 8047)
+++ trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2017-03-13 21:49:17 UTC (rev 8048)
@@ -163,6 +163,11 @@
% subtitle (for topic/sidebar)
\providecommand*{\DUsubtitle}[1]{\par\emph{#1}\smallskip}
+% text mode subscript
+\ifx\textsubscript\undefined
+ \usepackage{fixltx2e} % since 2015 loaded by default
+\fi
+
% title for topics, admonitions, unsupported section levels, and sidebar
\providecommand*{\DUtitle}[2][class-arg]{%
% call \DUtitle#1{#2} if it exists:
@@ -261,7 +266,6 @@
\DUtitle[dedication]{Dedication}
For Docutils users \& co-developers.
-
\end{quote}
\end{DUclass}
@@ -271,7 +275,6 @@
This is a test document, containing at least one example of each
reStructuredText construct.
-
\end{quote}
\end{DUclass}
@@ -291,7 +294,6 @@
\tableofcontents
-
\section{1 Structural Elements%
\label{structural-elements}%
}
@@ -318,19 +320,15 @@
Here’s a transition:
-
%___________________________________________________________________________
\DUtransition
-
It divides the section. Transitions may also occur between sections:
-
%___________________________________________________________________________
\DUtransition
-
\section{2 Body Elements%
\label{body-elements}%
}
@@ -387,15 +385,12 @@
}
\begin{itemize}
-
\item A bullet list
\begin{itemize}
-
\item Nested bullet list.
\item Nested item 2.
-
\end{itemize}
\item Item 2.
@@ -403,17 +398,14 @@
Paragraph 2 of item 2.
\begin{itemize}
-
\item Nested bullet list.
\item Nested item 2.
\begin{itemize}
-
\item Third level.
\item Item 2.
-
\end{itemize}
\item Nested item 3.
@@ -422,9 +414,7 @@
%
\phantomsection\label{target}
% Even if this item contains a target and a comment.
-
\end{itemize}
-
\end{itemize}
@@ -433,25 +423,20 @@
}
\begin{enumerate}
-
\item Arabic numerals.
\begin{enumerate}
\renewcommand{\labelenumii}{\alph{enumii})}
-
\item lower alpha)
\begin{enumerate}
\renewcommand{\labelenumiii}{(\roman{enumiii})}
-
\item (lower roman)
\begin{enumerate}
-
\item upper alpha.
\begin{list}{\Roman{enumv})}{\usecounter{enumv}}
-
\item upper roman)
\end{list}
\end{enumerate}
@@ -463,7 +448,6 @@
\begin{enumerate}
\renewcommand{\labelenumii}{\arabic{enumii}.}
\setcounter{enumii}{2}
-
\item Three
\item Four
@@ -472,7 +456,6 @@
\begin{enumerate}
\renewcommand{\labelenumii}{\Alph{enumii}.}
\setcounter{enumii}{2}
-
\item C
\item D
@@ -481,7 +464,6 @@
\begin{enumerate}
\renewcommand{\labelenumii}{\roman{enumii}.}
\setcounter{enumii}{2}
-
\item iii
\item iv
@@ -498,7 +480,6 @@
Definition
\item[{Term}] \leavevmode (\textbf{classifier})
-
Definition paragraph 1.
Definition paragraph 2.
@@ -506,9 +487,7 @@
\item[{Term}] \leavevmode
Definition
-\item[{Term}] \leavevmode (\textbf{classifier one})
-(\textbf{classifier two})
-
+\item[{Term}] \leavevmode (\textbf{classifier one})(\textbf{classifier two})
Definition
\end{description}
@@ -534,7 +513,6 @@
\DUrole{credits}{This paragraph has the \DUroletitlereference{credits} class set. (This is actually not
about credits but just for ensuring that the class attribute
doesn’t get stripped away.)}
-
\end{DUfieldlist}
@@ -545,7 +523,6 @@
For listing command-line options:
\begin{DUoptionlist}
-
\item[-a] command-line option “a”
\item[-b file] options can have arguments
@@ -568,7 +545,6 @@
\item[-1 file, --one=file, --two file] Multiple options with arguments.
\item[/V] DOS/VMS-style options too
-
\end{DUoptionlist}
There must be at least two spaces between the option and the
@@ -644,7 +620,6 @@
Take it away, Eric the Orchestra Leader!
\begin{quote}
-
\begin{DUlineblock}{0em}
\item[] A one, two, a one two three four
\item[]
@@ -675,7 +650,6 @@
\end{DUlineblock}
\item[] Singing…
\end{DUlineblock}
-
\end{quote}
A line block, like the following poem by Christian Morgenstern, can
@@ -706,7 +680,6 @@
Block quotes consist of indented body elements:
\begin{quote}
-
My theory by A. Elk. Brackets Miss, brackets. This theory goes
as follows and begins now. All brontosauruses are thin at one
end, much much thicker in the middle and then thin again at the
@@ -715,7 +688,6 @@
\nopagebreak
\raggedleft —Anne Elk (Miss)
-
\end{quote}
The language of a quote (like any other object) can be specified by
@@ -725,10 +697,8 @@
\begin{selectlanguage}{french}
\begin{quote}
-
ReStructuredText est un langage de balisage léger utilisé
notamment dans la documentation du langage Python.
-
\end{quote}
\end{selectlanguage}
@@ -1192,7 +1162,6 @@
\DUtitle[important]{Important}
\begin{itemize}
-
\item Wash behind your ears.
\item Clean up your room.
@@ -1200,7 +1169,6 @@
\item Call your mother.
\item Back up your data.
-
\end{itemize}
}
@@ -1258,7 +1226,6 @@
\DUtitle[topic]{Topic Title}
This is a topic.
-
\end{quote}
\end{DUclass}
@@ -1318,11 +1285,9 @@
Compound 1, paragraph 2.
\begin{itemize}
-
\item Compound 1, list item one.
\item Compound 1, list item two.
-
\end{itemize}
\end{DUclass}
@@ -1674,7 +1639,6 @@
}
\begin{itemize}
-
\item A role based on an existing role.
\texttt{\DUrole{custom}{one}} \texttt{\DUrole{custom}{two}} \texttt{\DUrole{custom}{three}}
@@ -1711,7 +1675,6 @@
\end{quote}
\DUrole{green}{\DUrole{sc}{\foreignlanguage{english}{British colourful text in small-caps}}}.
-
\end{itemize}
@@ -1854,7 +1817,6 @@
}
\begin{itemize}
-
\item Role names and class arguments are converted to conform to the
regular expression \texttt{{[}a-z{]}{[}-a-z0-9{]}*} (letters are downcased,
accents and similar decoration is stripped, non-conforming
@@ -1890,7 +1852,6 @@
This is a \textsuperscript{\DUrole{custom-superscript}{customized superscript text role}}
This is a \DUroletitlereference{\DUrole{custom-title-reference}{customized title-reference text role}}
-
\end{itemize}
@@ -1914,12 +1875,10 @@
\begin{DUclass}{epigraph}
\begin{quote}
-
Do not play this piece fast. It is never right to play \emph{Ragtime} fast.
\nopagebreak
\raggedleft —Scott Joplin
-
\end{quote}
\end{DUclass}
@@ -1940,13 +1899,11 @@
\begin{DUclass}{enumerateitems}
\begin{itemize}
-
\item item
\item next item
\item third item
-
\end{itemize}
\end{DUclass}
@@ -1954,11 +1911,9 @@
\begin{DUclass}{rules}
\begin{itemize}
-
\item item
\item next item
-
\end{itemize}
\end{DUclass}
@@ -1965,13 +1920,11 @@
A normal bullet list is kept unchanged by the above redefinitions:
\begin{itemize}
-
\item item
\item next item
\item third item
-
\end{itemize}
A container wraps several elements in a common “class wrapper”. Here, we use
@@ -1986,11 +1939,9 @@
paragraph 2
\begin{itemize}
-
\item bullet list
\item still bullet list
-
\end{itemize}
\end{DUclass}
@@ -2120,25 +2071,19 @@
is contained in a quote
\begin{quote}
-
\begin{DUoptionlist}
-
\item[--help] show help
\item[-v] verbose
-
\end{DUoptionlist}
-
\end{quote}
\begin{description}
\item[{In a definition list:}] \leavevmode
\begin{DUoptionlist}
-
\item[--help] show help
\item[-v] verbose
-
\end{DUoptionlist}
\end{description}
@@ -2533,7 +2478,6 @@
\end{longtable*}
\begin{itemize}
-
\item The following line should not be wrapped, because it uses
no-break spaces (\textbackslash{}u00a0):
@@ -2545,7 ...
[truncated message content] |
|
From: <mi...@us...> - 2017-03-13 21:49:53
|
Revision: 8049
http://sourceforge.net/p/docutils/code/8049
Author: milde
Date: 2017-03-13 21:49:51 +0000 (Mon, 13 Mar 2017)
Log Message:
-----------
LaTeX documentation update.
Modified Paths:
--------------
trunk/docutils/RELEASE-NOTES.txt
trunk/docutils/docs/user/latex.txt
Modified: trunk/docutils/RELEASE-NOTES.txt
===================================================================
--- trunk/docutils/RELEASE-NOTES.txt 2017-03-13 21:49:17 UTC (rev 8048)
+++ trunk/docutils/RELEASE-NOTES.txt 2017-03-13 21:49:51 UTC (rev 8049)
@@ -67,7 +67,8 @@
* docutils/writers/latex2e/__init__.py
- Handle class arguments for block-level elements by wrapping them
- in a "DUclass" environment.
+ in a "DUclass" environment. This replaces the special handling for
+ "epigraph" and "topic" elements.
* tools/
Modified: trunk/docutils/docs/user/latex.txt
===================================================================
--- trunk/docutils/docs/user/latex.txt 2017-03-13 21:49:17 UTC (rev 8048)
+++ trunk/docutils/docs/user/latex.txt 2017-03-13 21:49:51 UTC (rev 8049)
@@ -215,29 +215,35 @@
{\end{enumerate}}%
}
+Notes
+`````
-Class arguments may contain numbers and hyphens, which need special
-treatment in LaTeX command names (see `class directive`_). The commands
-``\begincsname`` and ``\endcsname`` or the special command ``\@namedef`` can
-help with the definition of corresponding macros or environments.
+* Class arguments may contain numbers and hyphens, which need special
+ treatment in LaTeX command names (see `class directive`_). The commands
+ ``\begincsname`` and ``\endcsname`` or the special command ``\@namedef``
+ can help with the definition of corresponding macros or environments.
* Elements can have multiple class arguments. In contrast to HTML/CSS, the
order of the class arguments cannot be ignored in LaTeX
-For "historical reasons", class handling differs for some elements and class
-values:
+* For "historical reasons", class handling differs for some elements and
+ class values:
-* The special macros ``\DUadmonition``, ``\DUtitle``, and ``\DUtopic`` are
- written with a comma separated list of class values as optional argument.
- See the sections on admonitions_, titles_, and the `topic element`_ for
- customization examples.
+ * The special macros ``\DUadmonition``, ``\DUtitle``, and ``\DUtopic`` are
+ written with a comma separated list of class values as optional
+ argument. See http://www.ctan.org/topic/keyval for LaTeX packages that
+ help parsing value lists.
-* Class argument values starting with ``align-`` are transformed to "align"
- argument values. Class argument values starting with ``language-`` set the
- elements language property.
+ See the sections on admonitions_, titles_, and the `topic element`_ for
+ customization examples.
-* The table element recognizes some special class values. See section table_.
+ * Class argument values starting with ``align-`` are transformed to
+ "align" argument values. Class argument values starting with
+ ``language-`` set the elements language property.
+ * The table element recognizes some special class values. See section
+ table_.
+
.. _"classes" attribute: ../ref/doctree.html#classes
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-03-19 23:04:52
|
Revision: 8050
http://sourceforge.net/p/docutils/code/8050
Author: milde
Date: 2017-03-19 23:04:50 +0000 (Sun, 19 Mar 2017)
Log Message:
-----------
Fix [ 313 ] differentiate apostrophe from single quote (if possible).
Mind, that this is not possible for apostrophe at end of words.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/utils/smartquotes.py
trunk/docutils/test/test_transforms/test_smartquotes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-03-13 21:49:51 UTC (rev 8049)
+++ trunk/docutils/HISTORY.txt 2017-03-19 23:04:50 UTC (rev 8050)
@@ -58,6 +58,8 @@
- Update quote definitions for languages et, fi, ro, sv, tr, uk.
- New quote definitions for hr, hsb, hu, lv, sl.
+ - Fix [ 313 ] Differentiate apostrophe from closing single quote
+ (if possible).
* docutils/writers/_html_base.py
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-03-13 21:49:51 UTC (rev 8049)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-03-19 23:04:50 UTC (rev 8050)
@@ -160,21 +160,21 @@
Backslash Escapes
=================
-If you need to use literal straight quotes (or plain hyphens and
-periods), SmartyPants accepts the following backslash escape sequences
-to force non-smart punctuation. It does so by transforming the escape
-sequence into a character:
+If you need to use literal straight quotes (or plain hyphens and periods),
+`smartquotes` accepts the following backslash escape sequences to force
+ASCII-punctuation. Mind, that you need two backslashes as Docutils expands it,
+too.
-======== ===== =========
-Escape Value Character
-======== ===== =========
-``\\\\`` \ \\
-\\" " "
-\\' ' '
-\\. . .
-\\- - \-
-\\` ` \`
-======== ===== =========
+======== =========
+Escape Character
+======== =========
+``\\`` \\
+``\\"`` \\"
+``\\'`` \\'
+``\\.`` \\.
+``\\-`` \\-
+``\\``` \\`
+======== =========
This is useful, for example, when you want to use straight quotes as
foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
@@ -274,7 +274,7 @@
continue not caring. Using straight quotes -- and sticking to the 7-bit
ASCII character set in general -- is certainly a simpler way to live.
-Even if you I *do* care about accurate typography, you still might want to
+Even if you *do* care about accurate typography, you still might want to
think twice before educating the quote characters in your weblog. One side
effect of publishing curly quote characters is that it makes your
weblog a bit harder for others to quote from using copy-and-paste. What
@@ -305,16 +305,45 @@
``'Twas the night before Christmas.``
In the case above, SmartyPants will turn the apostrophe into an opening
-single-quote, when in fact it should be a closing one. I don't think
-this problem can be solved in the general case -- every word processor
-I've tried gets this wrong as well. In such cases, it's best to use the
-proper character for closing single-quotes (``’``) by hand.
+single-quote, when in fact it should be the `right single quotation mark`
+character which is also "the preferred character to use for apostrophe"
+(Unicode). I don't think this problem can be solved in the general case --
+every word processor I've tried gets this wrong as well. In such cases, it's
+best to use the proper character for closing single-quotes (’) by hand.
+In English, the same character is used for apostrophe and closing single
+quote (both plain and "smart" ones). For other locales (French, Italean,
+Swiss, ...) "smart" single closing quotes differ from the curly apostrophe.
+ .. class:: language-fr
+
+ Il dit : "C'est 'super' !"
+
+If the apostrophe is used at the end of a word, it cannot be distinguished
+from a single quote by the algorithm. Therefore, a text like::
+
+ .. class:: language-de-CH
+
+ "Er sagt: 'Ich fass' es nicht.'"
+
+will get a single closing guillemet instead of an apostrophe.
+
+This can be prevented by use use of the curly apostrophe character (’) in
+the source:
+
+ .. class:: language-de-CH
+
+ "Er sagt: 'Ich fass' es nicht.'" → "Er sagt: 'Ich fass’ es nicht.'"
+
+
Version History
===============
-1.7 2012-11-19
+1.7.1: 2017-03-19
+ - Update and extend language-dependent quotes.
+ - Differentiate apostrophe from single quote.
+
+1.7: 2012-11-19
- Internationalization: language-dependent quotes.
1.6.1: 2012-11-06
@@ -370,6 +399,7 @@
endash = u'–' # "–" EN DASH
emdash = u'—' # "—" EM DASH
ellipsis = u'…' # "…" HORIZONTAL ELLIPSIS
+ apostrophe = u'’'
# quote characters (language-specific, set in __init__())
#
@@ -395,10 +425,10 @@
'da-x-altquot': u'„“‚‘',
'de': u'„“‚‘',
'de-x-altquot': u'»«›‹',
- 'de-CH': u'«»‹›',
+ 'de-ch': u'«»‹›',
'el': u'«»“”',
'en': u'“”‘’',
- 'en-UK': u'‘’“”',
+ 'en-uk': u'‘’“”',
'eo': u'“”‘’',
'es': u'«»“”',
'es-x-altquot': u'“”‘’',
@@ -410,7 +440,7 @@
'fr': (u'« ', u' »', u'‹ ', u' ›'), # with narrow no-break space
'fr-x-altquot': u'«»‹›', # for use with manually set spaces
# 'fr-x-altquot2': (u'“ ', u' ”', u'‘ ', u' ’'), # rarely used
- 'fr-CH': u'«»‹›',
+ 'fr-ch': u'«»‹›',
'gl': u'«»“”',
'he': u'”“»«',
'he-x-altquot': u'„”‚’',
@@ -420,7 +450,7 @@
'hsb-x-altquot':u'»«›‹',
'hu': u'„”«»',
'it': u'«»“”',
- 'it-CH': u'«»‹›',
+ 'it-ch': u'«»‹›',
'it-x-altquot': u'“”‘’',
# 'it-x-altquot2': u'“„‘‚', # antiquated?
'ja': u'「」『』',
@@ -432,7 +462,7 @@
'pl': u'„”«»',
'pl-x-altquot': u'«»“”',
'pt': u'«»“”',
- 'pt-BR': u'“”‘’',
+ 'pt-br': u'“”‘’',
'ro': u'„”«»',
'ru': u'«»„“',
'sk': u'„“‚‘',
@@ -446,8 +476,8 @@
# 'tr-x-altquot2': u'“„‘‚', # antiquated?
'uk': u'«»„“',
'uk-x-altquot': u'„“‚‘',
- 'zh-CN': u'“”‘’',
- 'zh-TW': u'「」『』',
+ 'zh-cn': u'“”‘’',
+ 'zh-tw': u'「」『』',
}
def __init__(self, language='en'):
@@ -454,7 +484,7 @@
self.language = language
try:
(self.opquote, self.cpquote,
- self.osquote, self.csquote) = self.quotes[language]
+ self.osquote, self.csquote) = self.quotes[language.lower()]
except KeyError:
self.opquote, self.cpquote, self.osquote, self.csquote = u'""\'\''
@@ -624,14 +654,22 @@
)
' # the quote
(?=\w) # followed by a word character
- """ % (dec_dashes,), re.VERBOSE)
+ """ % (dec_dashes,), re.VERBOSE | re.UNICODE)
text = opening_single_quotes_regex.sub(r'\1'+smart.osquote, text)
+ # In many locales, single closing quotes are different from apostrophe:
+ if smart.csquote != smart.apostrophe:
+ apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
+ text = apostrophe_regex.sub(smart.apostrophe, text)
+
closing_single_quotes_regex = re.compile(r"""
(%s)
'
- (?!\s | s\b | \d)
- """ % (close_class,), re.VERBOSE)
+ (?!\s | # whitespace
+ s\b |
+ \d # digits ('80s)
+ )
+ """ % (close_class,), re.VERBOSE | re.UNICODE)
text = closing_single_quotes_regex.sub(r'\1'+smart.csquote, text)
closing_single_quotes_regex = re.compile(r"""
@@ -638,7 +676,7 @@
(%s)
'
(\s | s\b)
- """ % (close_class,), re.VERBOSE)
+ """ % (close_class,), re.VERBOSE | re.UNICODE)
text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.csquote, text)
# Any remaining single quotes should be opening ones:
@@ -879,7 +917,7 @@
pass
from docutils.core import publish_string
- docstring_html = publish_string(__doc__, writer_name='html')
+ docstring_html = publish_string(__doc__, writer_name='html5')
print docstring_html
@@ -912,11 +950,3 @@
self.assertEqual(sp(text), text)
unittest.main()
-
-
-
-
-__author__ = "Chad Miller <sma...@ch...>"
-__version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400"
-__url__ = "http://wiki.chad.org/SmartyPantsPy"
-__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
Modified: trunk/docutils/test/test_transforms/test_smartquotes.py
===================================================================
--- trunk/docutils/test/test_transforms/test_smartquotes.py 2017-03-13 21:49:51 UTC (rev 8049)
+++ trunk/docutils/test/test_transforms/test_smartquotes.py 2017-03-19 23:04:50 UTC (rev 8050)
@@ -41,7 +41,7 @@
totest['transitions'] = ((SmartQuotes,), [
["""\
-Test "smart quotes", 'single smart quotes',
+Test "smart quotes", 'secondary smart quotes',
"'nested' smart" quotes
-- and ---also long--- dashes.
""",
@@ -48,11 +48,11 @@
u"""\
<document source="test data">
<paragraph>
- Test “smart quotes”, ‘single smart quotes’,
+ Test “smart quotes”, ‘secondary smart quotes’,
“‘nested’ smart” quotes
– and —also long— dashes.
"""],
-[r"""Escaped \\"smart quotes\\", \\'single smart quotes\\',
+[r"""Escaped \\"smart quotes\\", \\'secondary smart quotes\\',
\\"\\'nested\\' smart\\" quotes
\\-- and -\\--also long-\\-- dashes.
""",
@@ -59,7 +59,7 @@
u"""\
<document source="test data">
<paragraph>
- Escaped "smart quotes", 'single smart quotes',
+ Escaped "smart quotes", 'secondary smart quotes',
"'nested' smart" quotes
-- and ---also long--- dashes.
"""],
@@ -155,8 +155,12 @@
["""\
.. class:: language-de
-German "smart quotes" and 'single smart quotes'.
+German "smart quotes" and 'secondary smart quotes'.
+.. class:: language-en-UK
+
+British "quotes" use single and 'secondary quotes' double quote signs.
+
.. class:: language-foo
"Quoting style" for unknown languages is 'ASCII'.
@@ -163,17 +167,19 @@
.. class:: language-de-x-altquot
-Alternative German "smart quotes" and 'single smart quotes'.
+Alternative German "smart quotes" and 'secondary smart quotes'.
""",
u"""\
<document source="test data">
<paragraph classes="language-de">
- German „smart quotes“ and ‚single smart quotes‘.
+ German „smart quotes“ and ‚secondary smart quotes‘.
+ <paragraph classes="language-en-uk">
+ British ‘quotes’ use single and “secondary quotes” double quote signs.
<paragraph classes="language-foo">
"Quoting style" for unknown languages is 'ASCII'.
<paragraph classes="language-de-x-altquot">
- Alternative German »smart quotes« and ›single smart quotes‹.
- <system_message level="2" line="7" source="test data" type="WARNING">
+ Alternative German »smart quotes« and ›secondary smart quotes‹.
+ <system_message level="2" line="11" source="test data" type="WARNING">
<paragraph>
No smart quotes defined for language "foo".
"""],
@@ -181,28 +187,31 @@
totest_de['transitions'] = ((SmartQuotes,), [
["""\
-German "smart quotes" and 'single smart quotes'.
+German "smart quotes" and 'secondary smart quotes'.
-.. class:: language-en-UK
+.. class:: language-en
-English "smart quotes" and 'single smart quotes'.
+English "smart quotes" and 'secondary smart quotes'.
""",
u"""\
<document source="test data">
<paragraph>
- German „smart quotes“ and ‚single smart quotes‘.
- <paragraph classes="language-en-uk">
- English “smart quotes” and ‘single smart quotes’.
+ German „smart quotes“ and ‚secondary smart quotes‘.
+ <paragraph classes="language-en">
+ English “smart quotes” and ‘secondary smart quotes’.
"""],
])
totest_de_alt['transitions'] = ((SmartQuotes,), [
["""\
-Alternative German "smart quotes" and 'single smart quotes'.
+Alternative German "smart quotes" and 'secondary smart quotes'.
+In this case, the apostrophe isn't a closing secondary quote!
+
.. class:: language-en-UK
-English "smart quotes" and 'single smart quotes' have no alternative.
+British "quotes" use single and 'secondary quotes' double quote signs
+(there are no alternative quotes defined).
.. class:: language-ro
@@ -211,9 +220,12 @@
u"""\
<document source="test data">
<paragraph>
- Alternative German »smart quotes« and ›single smart quotes‹.
+ Alternative German »smart quotes« and ›secondary smart quotes‹.
+ <paragraph>
+ In this case, the apostrophe isn’t a closing secondary quote!
<paragraph classes="language-en-uk">
- English “smart quotes” and ‘single smart quotes’ have no alternative.
+ British ‘quotes’ use single and “secondary quotes” double quote signs
+ (there are no alternative quotes defined).
<paragraph classes="language-ro">
Romanian „smart quotes” and «secondary» smart quotes.
"""],
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-03-22 14:29:04
|
Revision: 8052
http://sourceforge.net/p/docutils/code/8052
Author: milde
Date: 2017-03-22 14:29:01 +0000 (Wed, 22 Mar 2017)
Log Message:
-----------
Improve and test "compound" handling in LaTeX.
Test samples added to standard.txt allow inspection of the outcome of
compound handling for all output formats.
Modified Paths:
--------------
trunk/docutils/docs/dev/todo.txt
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml
trunk/docutils/test/functional/expected/standalone_rst_html4css1.html
trunk/docutils/test/functional/expected/standalone_rst_html5.html
trunk/docutils/test/functional/expected/standalone_rst_latex.tex
trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt
trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
trunk/docutils/test/functional/input/data/standard.txt
Modified: trunk/docutils/docs/dev/todo.txt
===================================================================
--- trunk/docutils/docs/dev/todo.txt 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/docs/dev/todo.txt 2017-03-22 14:29:01 UTC (rev 8052)
@@ -2349,6 +2349,14 @@
* File names of included graphics (see also `grffile` package).
+* Paragraph following field-list or table in compound is indented.
+
+ This is a problem with the current DUfieldlist definition and with
+ the use of "longtable" for tables.
+ See `LaTeX constructs and packages instead of re-implementations`_ for
+ alternatives.
+
+
Generate clean and configurable LaTeX source
----------------------------------------------
@@ -2498,7 +2506,8 @@
Overriding:
- * continue if the `compound paragraph`_ directive is used, or
+ * continue if the `compound paragraph`_ directive is used (as currently),
+ or
* force a new paragraph with an empty comment.
* Sidebar handling (environment with `framed`, `marginnote`, `wrapfig`,
@@ -2552,7 +2561,6 @@
* Add more classes or options, e.g. for
- + column width set by latex,
+ horizontal alignment and rules.
+ long table vs. tabular (see next item).
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2017-03-22 14:29:01 UTC (rev 8052)
@@ -1595,7 +1595,8 @@
def duclass_open(self, node):
"""Open a group and insert declarations for class values."""
- self.out.append('\n')
+ if not isinstance(node.parent, nodes.compound):
+ self.out.append('\n')
for cls in node['classes']:
if cls.startswith('language-'):
language = self.babel.language_name(cls[9:])
@@ -1808,8 +1809,10 @@
pass
def visit_comment(self, node):
+ if not isinstance(node.parent, nodes.compound):
+ self.out.append('\n')
# Precede every line with a comment sign, wrap in newlines
- self.out.append('\n%% %s\n' % node.astext().replace('\n', '\n% '))
+ self.out.append('%% %s\n' % node.astext().replace('\n', '\n% '))
raise nodes.SkipNode
def depart_comment(self, node):
@@ -1816,10 +1819,10 @@
pass
def visit_compound(self, node):
+ if isinstance(node.parent, nodes.compound):
+ self.out.append('\n')
+ node['classes'].insert(0, 'compound')
self.duclass_open(node)
- # TODO: remove/comment blank lines in content
- # so that included lists, equations, figures, ...
- # become part of the compound paragraph.
def depart_compound(self, node):
self.duclass_close(node)
@@ -1859,7 +1862,7 @@
pass
def depart_definition(self, node):
- self.out.append('\n')
+ self.out.append('\n') # TODO: just pass?
def visit_definition_list(self, node):
self.duclass_open(node)
@@ -2382,8 +2385,10 @@
include_graphics_options.append('width=%s' %
self.to_latex_length(attrs['width']))
if not (self.is_inline(node) or
+ isinstance(node.parent, (nodes.figure, nodes.compound))):
+ pre.append('\n')
+ if not (self.is_inline(node) or
isinstance(node.parent, nodes.figure)):
- pre.append('\n')
post.append('\n')
pre.reverse()
self.out.extend(pre)
@@ -2653,12 +2658,12 @@
def visit_paragraph(self, node):
# insert blank line, unless
- # * the paragraph is first in a list item,
+ # * the paragraph is first in a list item or compound,
# * follows a non-paragraph node in a compound,
# * is in a table with auto-width columns
index = node.parent.index(node)
- if (index == 0 and (isinstance(node.parent, nodes.list_item) or
- isinstance(node.parent, nodes.description))):
+ if index == 0 and isinstance(node.parent,
+ (nodes.list_item, nodes.description, nodes.compound)):
pass
elif (index > 0 and isinstance(node.parent, nodes.compound) and
not isinstance(node.parent[index - 1], nodes.paragraph) and
Modified: trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml 2017-03-22 14:29:01 UTC (rev 8052)
@@ -1170,31 +1170,37 @@
</section>
<section ids="compound-paragraph" names="compound\ paragraph">
<title auto="1" refid="id80"><generated classes="sectnum">2.14.7 </generated>Compound Paragraph</title>
+ <paragraph>The <emphasis>compound</emphasis> directive is used to create a "compound paragraph", which
+ is a single logical paragraph containing multiple physical body
+ elements. For example:</paragraph>
+ <compound>
+ <paragraph>The 'rm' command is very dangerous. If you are logged
+ in as root and enter</paragraph>
+ <literal_block xml:space="preserve">cd /
+rm -rf *</literal_block>
+ <paragraph>you will erase the entire contents of your file system.</paragraph>
+ </compound>
+ <paragraph>Test the handling and display of compound paragraphs:</paragraph>
<compound classes="some-class">
- <paragraph>Compound 1, paragraph 1.</paragraph>
- <paragraph>Compound 1, paragraph 2.</paragraph>
+ <paragraph>Compound 2, paragraph 1,</paragraph>
+ <paragraph>compound 2, paragraph 2,</paragraph>
<bullet_list bullet="*">
<list_item>
- <paragraph>Compound 1, list item one.</paragraph>
+ <paragraph>list item 1,</paragraph>
</list_item>
<list_item>
- <paragraph>Compound 1, list item two.</paragraph>
+ <paragraph>list item 2,</paragraph>
</list_item>
</bullet_list>
+ <paragraph>compound 2, paragraph 3.</paragraph>
</compound>
- <paragraph>Another compound statement:</paragraph>
<compound>
- <paragraph>Compound 2, a literal block:</paragraph>
- <literal_block xml:space="preserve">Compound 2, literal.</literal_block>
- <paragraph>Compound 2, this is a test.</paragraph>
- </compound>
- <compound>
<paragraph>Compound 3, only consisting of one paragraph.</paragraph>
</compound>
<compound>
<literal_block xml:space="preserve">Compound 4.
This one starts with a literal block.</literal_block>
- <paragraph>Compound 4, a paragraph.</paragraph>
+ <paragraph>Compound 4, paragraph following the literal block.</paragraph>
</compound>
<paragraph>Now something <emphasis>really</emphasis> perverted -- a nested compound block. This is
just to test that it works at all; the results don't have to be
@@ -1202,13 +1208,14 @@
<compound>
<paragraph>Compound 5, block 1 (a paragraph).</paragraph>
<compound>
- <paragraph>Compound 6, block 2 in compound 5.</paragraph>
+ <paragraph>Compound 6 is block 2 in compound 5.</paragraph>
<paragraph>Compound 6, another paragraph.</paragraph>
</compound>
<paragraph>Compound 5, block 3 (a paragraph).</paragraph>
</compound>
<compound>
- <paragraph>Compound 7, with a table inside:</paragraph>
+ <paragraph>Compound 7, tests the inclusion of various block-level
+ elements in one logical paragraph. First a table,</paragraph>
<table>
<tgroup cols="3">
<colspec colwidth="20"></colspec>
@@ -1237,8 +1244,62 @@
</tbody>
</tgroup>
</table>
- <paragraph>Compound 7, a paragraph after the table.</paragraph>
- <paragraph>Compound 7, another paragraph.</paragraph>
+ <paragraph>followed by a paragraph. This physical paragraph is
+ actually a continuation of the paragraph before the table. It is followed
+ by</paragraph>
+ <block_quote>
+ <paragraph>a quote and</paragraph>
+ </block_quote>
+ <enumerated_list enumtype="arabic" prefix="" suffix=".">
+ <list_item>
+ <paragraph>an enumerated list,</paragraph>
+ </list_item>
+ </enumerated_list>
+ <paragraph>a paragraph,</paragraph>
+ <option_list>
+ <option_list_item>
+ <option_group>
+ <option>
+ <option_string>--an</option_string>
+ </option>
+ </option_group>
+ <description>
+ <paragraph>option list,</paragraph>
+ </description>
+ </option_list_item>
+ </option_list>
+ <paragraph>a paragraph,</paragraph>
+ <field_list>
+ <field>
+ <field_name>a field</field_name>
+ <field_body>
+ <paragraph>list,</paragraph>
+ </field_body>
+ </field>
+ </field_list>
+ <paragraph>a paragraph,</paragraph>
+ <definition_list>
+ <definition_list_item>
+ <term>a definition</term>
+ <definition>
+ <paragraph>list,</paragraph>
+ </definition>
+ </definition_list_item>
+ </definition_list>
+ <paragraph>a paragraph, an image:</paragraph>
+ <image uri="../../../docs/user/rst/images/biohazard.png"></image>
+ <paragraph>a paragraph,</paragraph>
+ <line_block>
+ <line>a line</line>
+ <line>block,</line>
+ </line_block>
+ <paragraph>a paragraph followed by a comment,</paragraph>
+ <comment xml:space="preserve">this is a comment</comment>
+ <paragraph>a paragraph, a</paragraph>
+ <note>
+ <paragraph>with content</paragraph>
+ </note>
+ <paragraph>and the final paragraph of the compound 7.</paragraph>
</compound>
</section>
<section ids="parsed-literal-blocks" names="parsed\ literal\ blocks">
@@ -1684,7 +1745,7 @@
<system_message level="1" line="685" source="functional/input/data/standard.txt" type="INFO">
<paragraph>Hyperlink target "docutils" is not referenced.</paragraph>
</system_message>
- <system_message level="1" line="806" source="functional/input/data/standard.txt" type="INFO">
+ <system_message level="1" line="851" source="functional/input/data/standard.txt" type="INFO">
<paragraph>Hyperlink target "hyperlink targets" is not referenced.</paragraph>
</system_message>
</section>
Modified: trunk/docutils/test/functional/expected/standalone_rst_html4css1.html
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_html4css1.html 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/test/functional/expected/standalone_rst_html4css1.html 2017-03-22 14:29:01 UTC (rev 8052)
@@ -854,22 +854,28 @@
</div>
<div class="section" id="compound-paragraph">
<h3><a class="toc-backref" href="#id83">2.14.7 Compound Paragraph</a></h3>
-<div class="some-class compound">
-<p class="compound-first">Compound 1, paragraph 1.</p>
-<p class="compound-middle">Compound 1, paragraph 2.</p>
-<ul class="compound-last simple">
-<li>Compound 1, list item one.</li>
-<li>Compound 1, list item two.</li>
-</ul>
-</div>
-<p>Another compound statement:</p>
+<p>The <em>compound</em> directive is used to create a "compound paragraph", which
+is a single logical paragraph containing multiple physical body
+elements. For example:</p>
<div class="compound">
-<p class="compound-first">Compound 2, a literal block:</p>
+<p class="compound-first">The 'rm' command is very dangerous. If you are logged
+in as root and enter</p>
<pre class="compound-middle literal-block">
-Compound 2, literal.
+cd /
+rm -rf *
</pre>
-<p class="compound-last">Compound 2, this is a test.</p>
+<p class="compound-last">you will erase the entire contents of your file system.</p>
</div>
+<p>Test the handling and display of compound paragraphs:</p>
+<div class="some-class compound">
+<p class="compound-first">Compound 2, paragraph 1,</p>
+<p class="compound-middle">compound 2, paragraph 2,</p>
+<ul class="compound-middle simple">
+<li>list item 1,</li>
+<li>list item 2,</li>
+</ul>
+<p class="compound-last">compound 2, paragraph 3.</p>
+</div>
<div class="compound">
<p>Compound 3, only consisting of one paragraph.</p>
</div>
@@ -878,7 +884,7 @@
Compound 4.
This one starts with a literal block.
</pre>
-<p class="compound-last">Compound 4, a paragraph.</p>
+<p class="compound-last">Compound 4, paragraph following the literal block.</p>
</div>
<p>Now something <em>really</em> perverted -- a nested compound block. This is
just to test that it works at all; the results don't have to be
@@ -886,13 +892,14 @@
<div class="compound">
<p class="compound-first">Compound 5, block 1 (a paragraph).</p>
<div class="compound-middle compound">
-<p class="compound-first">Compound 6, block 2 in compound 5.</p>
+<p class="compound-first">Compound 6 is block 2 in compound 5.</p>
<p class="compound-last">Compound 6, another paragraph.</p>
</div>
<p class="compound-last">Compound 5, block 3 (a paragraph).</p>
</div>
<div class="compound">
-<p class="compound-first">Compound 7, with a table inside:</p>
+<p class="compound-first">Compound 7, tests the inclusion of various block-level
+elements in one logical paragraph. First a table,</p>
<table border="1" class="compound-middle docutils">
<colgroup>
<col width="33%" />
@@ -916,10 +923,55 @@
</tr>
</tbody>
</table>
-<p class="compound-middle">Compound 7, a paragraph after the table.</p>
-<p class="compound-last">Compound 7, another paragraph.</p>
+<p class="compound-middle">followed by a paragraph. This physical paragraph is
+actually a continuation of the paragraph before the table. It is followed
+by</p>
+<blockquote class="compound-middle">
+a quote and</blockquote>
+<ol class="compound-middle arabic simple">
+<li>an enumerated list,</li>
+</ol>
+<p class="compound-middle">a paragraph,</p>
+<table class="compound-middle docutils option-list" frame="void" rules="none">
+<col class="option" />
+<col class="description" />
+<tbody valign="top">
+<tr><td class="option-group">
+<kbd><span class="option">--an</span></kbd></td>
+<td>option list,</td></tr>
+</tbody>
+</table>
+<p class="compound-middle">a paragraph,</p>
+<table class="compound-middle docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">a field:</th><td class="field-body">list,</td>
+</tr>
+</tbody>
+</table>
+<p class="compound-middle">a paragraph,</p>
+<dl class="compound-middle docutils">
+<dt>a definition</dt>
+<dd>list,</dd>
+</dl>
+<p class="compound-middle">a paragraph, an image:</p>
+<img alt="../../../docs/user/rst/images/biohazard.png" class="compound-middle" src="../../../docs/user/rst/images/biohazard.png" />
+<p class="compound-middle">a paragraph,</p>
+<div class="compound-middle line-block">
+<div class="line">a line</div>
+<div class="line">block,</div>
</div>
+<p class="compound-middle">a paragraph followed by a comment,</p>
+<!-- this is a comment -->
+<p class="compound-middle">a paragraph, a</p>
+<div class="admonition note compound-middle">
+<p class="first admonition-title">Note</p>
+<p class="last">with content</p>
</div>
+<p class="compound-last">and the final paragraph of the compound 7.</p>
+</div>
+</div>
<div class="section" id="parsed-literal-blocks">
<h3><a class="toc-backref" href="#id84">2.14.8 Parsed Literal Blocks</a></h3>
<pre class="literal-block">
Modified: trunk/docutils/test/functional/expected/standalone_rst_html5.html
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_html5.html 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/test/functional/expected/standalone_rst_html5.html 2017-03-22 14:29:01 UTC (rev 8052)
@@ -835,27 +835,33 @@
</div>
<div class="section" id="compound-paragraph">
<h3><a class="toc-backref" href="#id93"><span class="sectnum">2.14.7</span> Compound Paragraph</a></h3>
+<p>The <em>compound</em> directive is used to create a "compound paragraph", which
+is a single logical paragraph containing multiple physical body
+elements. For example:</p>
+<div class="compound">
+<p class="compound-first">The 'rm' command is very dangerous. If you are logged
+in as root and enter</p>
+<pre class="compound-middle literal-block">cd /
+rm -rf *</pre>
+<p class="compound-last">you will erase the entire contents of your file system.</p>
+</div>
+<p>Test the handling and display of compound paragraphs:</p>
<div class="some-class compound">
-<p class="compound-first">Compound 1, paragraph 1.</p>
-<p class="compound-middle">Compound 1, paragraph 2.</p>
-<ul class="compound-last simple">
-<li><p>Compound 1, list item one.</p></li>
-<li><p>Compound 1, list item two.</p></li>
+<p class="compound-first">Compound 2, paragraph 1,</p>
+<p class="compound-middle">compound 2, paragraph 2,</p>
+<ul class="compound-middle simple">
+<li><p>list item 1,</p></li>
+<li><p>list item 2,</p></li>
</ul>
+<p class="compound-last">compound 2, paragraph 3.</p>
</div>
-<p>Another compound statement:</p>
<div class="compound">
-<p class="compound-first">Compound 2, a literal block:</p>
-<pre class="compound-middle literal-block">Compound 2, literal.</pre>
-<p class="compound-last">Compound 2, this is a test.</p>
-</div>
-<div class="compound">
<p>Compound 3, only consisting of one paragraph.</p>
</div>
<div class="compound">
<pre class="compound-first literal-block">Compound 4.
This one starts with a literal block.</pre>
-<p class="compound-last">Compound 4, a paragraph.</p>
+<p class="compound-last">Compound 4, paragraph following the literal block.</p>
</div>
<p>Now something <em>really</em> perverted -- a nested compound block. This is
just to test that it works at all; the results don't have to be
@@ -863,13 +869,14 @@
<div class="compound">
<p class="compound-first">Compound 5, block 1 (a paragraph).</p>
<div class="compound-middle compound">
-<p class="compound-first">Compound 6, block 2 in compound 5.</p>
+<p class="compound-first">Compound 6 is block 2 in compound 5.</p>
<p class="compound-last">Compound 6, another paragraph.</p>
</div>
<p class="compound-last">Compound 5, block 3 (a paragraph).</p>
</div>
<div class="compound">
-<p class="compound-first">Compound 7, with a table inside:</p>
+<p class="compound-first">Compound 7, tests the inclusion of various block-level
+elements in one logical paragraph. First a table,</p>
<table class="compound-middle">
<colgroup>
<col style="width: 33%" />
@@ -893,10 +900,50 @@
</tr>
</tbody>
</table>
-<p class="compound-middle">Compound 7, a paragraph after the table.</p>
-<p class="compound-last">Compound 7, another paragraph.</p>
+<p class="compound-middle">followed by a paragraph. This physical paragraph is
+actually a continuation of the paragraph before the table. It is followed
+by</p>
+<blockquote class="compound-middle">
+<p>a quote and</p>
+</blockquote>
+<ol class="compound-middle arabic simple">
+<li><p>an enumerated list,</p></li>
+</ol>
+<p class="compound-middle">a paragraph,</p>
+<dl class="compound-middle option-list">
+<dt><kbd><span class="option">--an</span></kbd></dt>
+<dd><p>option list,</p>
+</dd>
+</dl>
+<p class="compound-middle">a paragraph,</p>
+<dl class="compound-middle field-list simple">
+<dt>a field</dt>
+<dd><p>list,</p>
+</dd>
+</dl>
+<p class="compound-middle">a paragraph,</p>
+<dl class="compound-middle simple">
+<dt>a definition</dt>
+<dd><p>list,</p>
+</dd>
+</dl>
+<p class="compound-middle">a paragraph, an image:</p>
+<img alt="../../../docs/user/rst/images/biohazard.png" class="compound-middle" src="../../../docs/user/rst/images/biohazard.png" />
+<p class="compound-middle">a paragraph,</p>
+<div class="compound-middle line-block">
+<div class="line">a line</div>
+<div class="line">block,</div>
</div>
+<p class="compound-middle">a paragraph followed by a comment,</p>
+<!-- this is a comment -->
+<p class="compound-middle">a paragraph, a</p>
+<div class="admonition note compound-middle">
+<p class="admonition-title">Note</p>
+<p>with content</p>
</div>
+<p class="compound-last">and the final paragraph of the compound 7.</p>
+</div>
+</div>
<div class="section" id="parsed-literal-blocks">
<h3><a class="toc-backref" href="#id94"><span class="sectnum">2.14.8</span> Parsed Literal Blocks</a></h3>
<pre class="literal-block">This is a parsed literal block.
Modified: trunk/docutils/test/functional/expected/standalone_rst_latex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-22 14:29:01 UTC (rev 8052)
@@ -1269,35 +1269,43 @@
\label{compound-paragraph}%
}
-\begin{DUclass}{some-class}
+The \emph{compound} directive is used to create a “compound paragraph”, which
+is a single logical paragraph containing multiple physical body
+elements. For example:
-Compound 1, paragraph 1.
+\begin{DUclass}{compound}
+The ‘rm’ command is very dangerous. If you are logged
+in as root and enter
+\begin{quote}
+\begin{alltt}
+cd /
+rm -rf *
+\end{alltt}
+\end{quote}
+you will erase the entire contents of your file system.
+\end{DUclass}
-Compound 1, paragraph 2.
+Test the handling and display of compound paragraphs:
+\begin{DUclass}{compound}
+\begin{DUclass}{some-class}
+Compound 2, paragraph 1,
+
+compound 2, paragraph 2,
\begin{itemize}
-\item Compound 1, list item one.
+\item list item 1,
-\item Compound 1, list item two.
+\item list item 2,
\end{itemize}
+compound 2, paragraph 3.
\end{DUclass}
+\end{DUclass}
-Another compound statement:
-
-
-Compound 2, a literal block:
-
-\begin{quote}
-\begin{alltt}
-Compound 2, literal.
-\end{alltt}
-\end{quote}
-Compound 2, this is a test.
-
-
+\begin{DUclass}{compound}
Compound 3, only consisting of one paragraph.
+\end{DUclass}
-
+\begin{DUclass}{compound}
\begin{quote}
\begin{alltt}
Compound 4.
@@ -1304,25 +1312,29 @@
This one starts with a literal block.
\end{alltt}
\end{quote}
-Compound 4, a paragraph.
+Compound 4, paragraph following the literal block.
+\end{DUclass}
Now something \emph{really} perverted – a nested compound block. This is
just to test that it works at all; the results don’t have to be
meaningful.
-
+\begin{DUclass}{compound}
Compound 5, block 1 (a paragraph).
+\begin{DUclass}{compound}
+Compound 6 is block 2 in compound 5.
-Compound 6, block 2 in compound 5.
-
Compound 6, another paragraph.
+\end{DUclass}
Compound 5, block 3 (a paragraph).
+\end{DUclass}
+\begin{DUclass}{compound}
+Compound 7, tests the inclusion of various block-level
+elements in one logical paragraph. First a table,
-Compound 7, with a table inside:
-
\setlength{\DUtablewidth}{\linewidth}
\begin{longtable*}[c]{|p{0.249\DUtablewidth}|p{0.249\DUtablewidth}|p{0.249\DUtablewidth}|}
\hline
@@ -1346,11 +1358,50 @@
\\
\hline
\end{longtable*}
-Compound 7, a paragraph after the table.
+followed by a paragraph. This physical paragraph is
+actually a continuation of the paragraph before the table. It is followed
+by
+\begin{quote}
+a quote and
+\end{quote}
+\begin{enumerate}
+\item an enumerated list,
+\end{enumerate}
+a paragraph,
+\begin{DUoptionlist}
+\item[-{}-an] option list,
+\end{DUoptionlist}
+a paragraph,
+\begin{DUfieldlist}
+\item[{a field:}]
+list,
+\end{DUfieldlist}
+a paragraph,
+\begin{description}
+\item[{a definition}] \leavevmode
+list,
-Compound 7, another paragraph.
+\end{description}
+a paragraph, an image:
+\includegraphics{../../../docs/user/rst/images/biohazard.png}
+a paragraph,
+\begin{DUlineblock}{0em}
+\item[] a line
+\item[] block,
+\end{DUlineblock}
+a paragraph followed by a comment,
+% this is a comment
+a paragraph, a
+\DUadmonition[note]{
+\DUtitle[note]{Note}
+with content
+}
+and the final paragraph of the compound 7.
+\end{DUclass}
+
+
\subsubsection{2.14.8~~~Parsed Literal Blocks%
\label{parsed-literal-blocks}%
}
Modified: trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt 2017-03-21 15:45:28 UTC (rev 8051)
+++ trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt 2017-03-22 14:29:01 UTC (rev 8052)
@@ -1663,27 +1663,38 @@
<generated classes="sectnum">
2.14.7
Compound Paragraph
+ <paragraph>
+ The
+ <emphasis>
+ compound
+ directive is used to create a "compound paragraph", which
+ is a single logical paragraph containing multiple physical body
+ elements. For example:
+ <compound>
+ <paragraph>
+ The 'rm' command is very dangerous. If you are logged
+ in as root and enter
+ <literal_block xml:space="preserve">
+ cd /
+ rm -rf *
+ <paragraph>
+ you will erase the entire contents of your file system.
+ <paragraph>
+ Test the handling and display of compound paragraphs:
<compound classes="some-class">
<paragraph>
- Compound 1, paragraph 1.
+ Compound 2, paragraph 1,
<paragraph>
- Compound 1, paragraph 2.
+ compound 2, paragraph 2,
<bullet_list bullet="*">
<list_item>
<paragraph>
- Compound 1, list item one.
+ list item 1,
<list_item>
<paragraph>
- Compound 1, list item two.
- <paragraph>
- Another compound statement:
- <compound>
+ list item 2,
<paragraph>
- Compound 2, a literal block:
- <literal_block xml:space="preserve">
- Compound 2, literal.
- <paragraph>
- Compound 2, this is a test.
+ compound 2, paragraph 3.
<compound>
<paragraph>
Compound 3, only consisting of one paragraph.
@@ -1692,7 +1703,7 @@
Compound 4.
This one starts with a literal block.
<paragraph>
- Compound 4, a paragraph.
+ Compound 4, paragraph following the literal block.
<paragraph>
...
[truncated message content] |
|
From: <mi...@us...> - 2017-03-31 12:13:56
|
Revision: 8053
http://sourceforge.net/p/docutils/code/8053
Author: milde
Date: 2017-03-31 12:13:54 +0000 (Fri, 31 Mar 2017)
Log Message:
-----------
Update smartquotes:
* use the rules of the `Imprimerie nationale` as french default
(full NBSP inside guillemets).
* do not invert {U+201C} and ' in en-UK: expect British authors to use
u0027 APOSTROPHE for primary quotes and " for secondary quotes in the source.
Set ``--smart-quotes=alt`` (or use en-UK-x-altquot) for inversion
by `smartquotes`.
* do not call ``educate_backticks`` in the SmartQuotes transform:
backticks have a special meaning in rST.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/transforms/universal.py
trunk/docutils/docutils/utils/smartquotes.py
trunk/docutils/test/test_transforms/test_smartquotes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-03-22 14:29:01 UTC (rev 8052)
+++ trunk/docutils/HISTORY.txt 2017-03-31 12:13:54 UTC (rev 8053)
@@ -56,8 +56,8 @@
* docutils/utils/smartquotes.py:
- - Update quote definitions for languages et, fi, ro, sv, tr, uk.
- - New quote definitions for hr, hsb, hu, lv, sl.
+ - Update quote definitions for languages et, fi, fr, ro, sv, tr, uk.
+ - New quote definitions for hr, hsb, hu, lv, sh, sl, sr.
- Fix [ 313 ] Differentiate apostrophe from closing single quote
(if possible).
Modified: trunk/docutils/docutils/transforms/universal.py
===================================================================
--- trunk/docutils/docutils/transforms/universal.py 2017-03-22 14:29:01 UTC (rev 8052)
+++ trunk/docutils/docutils/transforms/universal.py 2017-03-31 12:13:54 UTC (rev 8053)
@@ -288,7 +288,7 @@
# Iterator educating quotes in plain text:
# '2': set all, using old school en- and em- dash shortcuts
teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
- attr='2', language=lang)
+ attr='qDe', language=lang)
for txtnode, newtext in zip(txtnodes, teacher):
txtnode.parent.replace(txtnode, nodes.Text(newtext))
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-03-22 14:29:01 UTC (rev 8052)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-03-31 12:13:54 UTC (rev 8053)
@@ -192,7 +192,7 @@
"1"
Performs default SmartyPants transformations: quotes (including
\`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
- is used to signify an em-dash; there is no support for en-dashes.
+ is used to signify an em-dash; there is no support for en-dashes
"2"
Same as smarty_pants="1", except that it uses the old-school typewriter
@@ -217,7 +217,8 @@
example, to educate normal quotes and em-dashes, but not ellipses or
\`\`backticks'' -style quotes:
-``py['smartypants_attributes'] = "1"``
+E.g. ``py['smartypants_attributes'] = "1"`` is equivalent to
+``py['smartypants_attributes'] = "qBde"``.
"q"
Educates normal quote characters: (") and (').
@@ -329,13 +330,12 @@
will get a single closing guillemet instead of an apostrophe.
This can be prevented by use use of the curly apostrophe character (’) in
-the source:
+the source::
- .. class:: language-de-CH
+ - "Er sagt: 'Ich fass' es nicht.'"
+ + "Er sagt: 'Ich fass’ es nicht.'"
- "Er sagt: 'Ich fass' es nicht.'" → "Er sagt: 'Ich fass’ es nicht.'"
-
Version History
===============
@@ -399,20 +399,14 @@
endash = u'–' # "–" EN DASH
emdash = u'—' # "—" EM DASH
ellipsis = u'…' # "…" HORIZONTAL ELLIPSIS
- apostrophe = u'’'
+ apostrophe = u'’' # "’" RIGHT SINGLE QUOTATION MARK
# quote characters (language-specific, set in __init__())
- #
- # English smart quotes (open primary, close primary, open secondary, close
- # secondary) are:
- # opquote = u'“' # "“" LEFT DOUBLE QUOTATION MARK
- # cpquote = u'”' # "”" RIGHT DOUBLE QUOTATION MARK
- # osquote = u'‘' # "‘" LEFT SINGLE QUOTATION MARK
- # csquote = u'’' # "’" RIGHT SINGLE QUOTATION MARK
- # For other languages see:
# http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
# http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen
# https://fr.wikipedia.org/wiki/Guillemet
+ # http://typographisme.net/post/Les-espaces-typographiques-et-le-web
+ # http://www.btb.termiumplus.gc.ca/tpv2guides/guides/redac/index-fra.html
# https://en.wikipedia.org/wiki/Hebrew_punctuation#Quotation_marks
# http://www.tustep.uni-tuebingen.de/bi/bi00/bi001t1-anfuehrung.pdf
quotes = {'af': u'“”‘’',
@@ -428,7 +422,7 @@
'de-ch': u'«»‹›',
'el': u'«»“”',
'en': u'“”‘’',
- 'en-uk': u'‘’“”',
+ 'en-uk-x-altquot': u'‘’“”', # Attention: " → ‘ and ' → “ !
'eo': u'“”‘’',
'es': u'«»“”',
'es-x-altquot': u'“”‘’',
@@ -437,10 +431,10 @@
'eu': u'«»‹›',
'fi': u'””’’',
'fi-x-altquot': u'»»››',
- 'fr': (u'« ', u' »', u'‹ ', u' ›'), # with narrow no-break space
- 'fr-x-altquot': u'«»‹›', # for use with manually set spaces
- # 'fr-x-altquot2': (u'“ ', u' ”', u'‘ ', u' ’'), # rarely used
+ 'fr': (u'« ', u' »', u'“', u'”'), # full no-break space
+ 'fr-x-altquot': (u'« ', u' »', u'“', u'”'), # narrow no-break space
'fr-ch': u'«»‹›',
+ 'fr-ch-x-altquot': (u'« ', u' »', u'‹ ', u' ›'), # narrow no-break space, http://typoguide.ch/
'gl': u'«»“”',
'he': u'”“»«',
'he-x-altquot': u'„”‚’',
@@ -465,8 +459,11 @@
'pt-br': u'“”‘’',
'ro': u'„”«»',
'ru': u'«»„“',
+ 'sh': u'„”‚’',
+ 'sh-x-altquot': u'»«›‹',
'sk': u'„“‚‘',
'sk-x-altquot': u'»«›‹',
+ 'sr': u'„”’’',
'sl': u'„“‚‘',
'sl-x-altquot': u'»«›‹',
'sv': u'””’’',
@@ -637,7 +634,8 @@
text = re.sub(r"""'"(?=\w)""", smart.osquote+smart.opquote, text)
# Special case for decade abbreviations (the '80s):
- text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text)
+ if language.startswith('en'): # TODO similar cases in other languages?
+ text = re.sub(r"""'(?=\d{2}s)""", smart.apostrophe, text, re.UNICODE)
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
dec_dashes = r"""–|—"""
@@ -661,6 +659,8 @@
if smart.csquote != smart.apostrophe:
apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
text = apostrophe_regex.sub(smart.apostrophe, text)
+ # TODO: keep track of quoting level to recognize apostrophe in, e.g.,
+ # "Ich fass' es nicht."
closing_single_quotes_regex = re.compile(r"""
(%s)
Modified: trunk/docutils/test/test_transforms/test_smartquotes.py
===================================================================
--- trunk/docutils/test/test_transforms/test_smartquotes.py 2017-03-22 14:29:01 UTC (rev 8052)
+++ trunk/docutils/test/test_transforms/test_smartquotes.py 2017-03-31 12:13:54 UTC (rev 8053)
@@ -157,9 +157,10 @@
German "smart quotes" and 'secondary smart quotes'.
-.. class:: language-en-UK
+.. class:: language-en-UK-x-altquot
-British "quotes" use single and 'secondary quotes' double quote signs.
+British "primary quotes" use single and
+'secondary quotes' double quote signs.
.. class:: language-foo
@@ -173,13 +174,14 @@
<document source="test data">
<paragraph classes="language-de">
German „smart quotes“ and ‚secondary smart quotes‘.
- <paragraph classes="language-en-uk">
- British ‘quotes’ use single and “secondary quotes” double quote signs.
+ <paragraph classes="language-en-uk-x-altquot">
+ British ‘primary quotes’ use single and
+ “secondary quotes” double quote signs.
<paragraph classes="language-foo">
"Quoting style" for unknown languages is 'ASCII'.
<paragraph classes="language-de-x-altquot">
Alternative German »smart quotes« and ›secondary smart quotes‹.
- <system_message level="2" line="11" source="test data" type="WARNING">
+ <system_message level="2" line="12" source="test data" type="WARNING">
<paragraph>
No smart quotes defined for language "foo".
"""],
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-04-06 13:10:37
|
Revision: 8055
http://sourceforge.net/p/docutils/code/8055
Author: milde
Date: 2017-04-06 13:10:34 +0000 (Thu, 06 Apr 2017)
Log Message:
-----------
Do not add a second ID to problematic references.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/transforms/references.py
trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml
trunk/docutils/test/functional/expected/standalone_rst_html4css1.html
trunk/docutils/test/functional/expected/standalone_rst_html5.html
trunk/docutils/test/functional/expected/standalone_rst_latex.tex
trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt
trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
trunk/docutils/test/test_transforms/test_hyperlinks.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/HISTORY.txt 2017-04-06 13:10:34 UTC (rev 8055)
@@ -49,6 +49,10 @@
- Rework patch [ 120 ] (revert change to ``Table.get_column_widths()``
that led to problems in an application with a custom table directive).
+* docutils/transforms/references.py
+
+ - Don't add a second ID to problematic references.
+
* docutils/utils/__init__.py:
- Added ``split_escaped_whitespace`` function, support for escaped
Modified: trunk/docutils/docutils/transforms/references.py
===================================================================
--- trunk/docutils/docutils/transforms/references.py 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/docutils/transforms/references.py 2017-04-06 13:10:34 UTC (rev 8055)
@@ -893,7 +893,10 @@
msgid = self.document.set_id(msg)
prb = nodes.problematic(
node.rawsource, node.rawsource, refid=msgid)
- prbid = self.document.set_id(prb)
+ try:
+ prbid = node['ids'][0]
+ except IndexError:
+ prbid = self.document.set_id(prb)
msg.add_backref(prbid)
node.replace_self(prb)
else:
Modified: trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml 2017-04-06 13:10:34 UTC (rev 8055)
@@ -700,7 +700,7 @@
<footnote ids="id16" names="4">
<label>4</label>
<paragraph>Here's an unreferenced footnote, with a reference to a
- nonexistent footnote: <problematic ids="id84 id17" refid="id83">[5]_</problematic>.</paragraph>
+ nonexistent footnote: <problematic ids="id17" refid="id83">[5]_</problematic>.</paragraph>
</footnote>
</section>
<section ids="citations" names="citations">
@@ -710,7 +710,7 @@
<paragraph>Citations are text-labeled footnotes. They may be
rendered separately and differently from footnotes.</paragraph>
</citation>
- <paragraph>Here's a reference to the above, <citation_reference ids="id18" refid="cit2002">CIT2002</citation_reference>, and a <problematic ids="id86 id19" refid="id85">[nonexistent]_</problematic>
+ <paragraph>Here's a reference to the above, <citation_reference ids="id18" refid="cit2002">CIT2002</citation_reference>, and a <problematic ids="id19" refid="id84">[nonexistent]_</problematic>
citation.</paragraph>
<target refid="another-target"></target>
</section>
@@ -728,7 +728,7 @@
<paragraph>Targets may be indirect and anonymous. Thus <reference anonymous="1" name="this phrase" refid="targets">this phrase</reference> may also
refer to the <reference name="Targets" refid="targets">Targets</reference> section.</paragraph>
<target anonymous="1" ids="id20" refid="targets"></target>
- <paragraph>Here's a <problematic ids="id88" refid="id87">`hyperlink reference without a target`_</problematic>, which generates an
+ <paragraph>Here's a <problematic ids="id86" refid="id85">`hyperlink reference without a target`_</problematic>, which generates an
error.</paragraph>
<section dupnames="duplicate\ target\ names" ids="duplicate-target-names">
<title auto="1" refid="id53"><generated classes="sectnum">2.13.1 </generated>Duplicate Target Names</title>
@@ -743,7 +743,7 @@
</system_message>
<paragraph>Since there are two "Duplicate Target Names" section headers, we
cannot uniquely refer to either of them by name. If we try to (like
- this: <problematic ids="id90" refid="id89">`Duplicate Target Names`_</problematic>), an error is generated.</paragraph>
+ this: <problematic ids="id88" refid="id87">`Duplicate Target Names`_</problematic>), an error is generated.</paragraph>
</section>
</section>
<section ids="directives" names="directives">
@@ -1709,16 +1709,16 @@
<system_message backrefs="id24" ids="id23" level="3" line="104" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>Undefined substitution referenced: "problematic".</paragraph>
</system_message>
- <system_message backrefs="id84" ids="id83" level="3" line="391" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id17" ids="id83" level="3" line="391" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>Unknown target name: "5".</paragraph>
</system_message>
- <system_message backrefs="id86" ids="id85" level="3" line="400" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id19" ids="id84" level="3" line="400" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>Unknown target name: "nonexistent".</paragraph>
</system_message>
- <system_message backrefs="id88" ids="id87" level="3" line="427" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id86" ids="id85" level="3" line="427" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>Unknown target name: "hyperlink reference without a target".</paragraph>
</system_message>
- <system_message backrefs="id90" ids="id89" level="3" line="440" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id88" ids="id87" level="3" line="440" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>Duplicate target name, cannot be used as a unique reference: "duplicate target names".</paragraph>
</system_message>
<system_message level="1" line="163" source="functional/input/data/standard.txt" type="INFO">
Modified: trunk/docutils/test/functional/expected/standalone_rst_html4css1.html
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_html4css1.html 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/functional/expected/standalone_rst_html4css1.html 2017-04-06 13:10:34 UTC (rev 8055)
@@ -506,7 +506,7 @@
<colgroup><col class="label" /><col /></colgroup>
<tbody valign="top">
<tr><td class="label">[4]</td><td>Here's an unreferenced footnote, with a reference to a
-nonexistent footnote: <a href="#id86"><span class="problematic" id="id87"><span id="id17"></span>[5]_</span></a>.</td></tr>
+nonexistent footnote: <a href="#id86"><span class="problematic" id="id17">[5]_</span></a>.</td></tr>
</tbody>
</table>
</div>
@@ -519,7 +519,7 @@
rendered separately and differently from footnotes.</td></tr>
</tbody>
</table>
-<p>Here's a reference to the above, <a class="citation-reference" href="#cit2002" id="id18">[CIT2002]</a>, and a <a href="#id88"><span class="problematic" id="id89"><span id="id19"></span>[nonexistent]_</span></a>
+<p>Here's a reference to the above, <a class="citation-reference" href="#cit2002" id="id18">[CIT2002]</a>, and a <a href="#id87"><span class="problematic" id="id19">[nonexistent]_</span></a>
citation.</p>
</div>
<div class="section" id="targets">
@@ -533,7 +533,7 @@
"<a class="reference external" href="http://www.python.org/">Python</a> <a class="footnote-reference" href="#id25" id="id27">[5]</a>".</p>
<p>Targets may be indirect and anonymous. Thus <a class="reference internal" href="#targets">this phrase</a> may also
refer to the <a class="reference internal" href="#targets">Targets</a> section.</p>
-<p>Here's a <a href="#id90"><span class="problematic" id="id91">`hyperlink reference without a target`_</span></a>, which generates an
+<p>Here's a <a href="#id88"><span class="problematic" id="id89">`hyperlink reference without a target`_</span></a>, which generates an
error.</p>
<div class="section" id="duplicate-target-names">
<h3><a class="toc-backref" href="#id53">2.13.1 Duplicate Target Names</a></h3>
@@ -545,7 +545,7 @@
<h3><a class="toc-backref" href="#id54">2.13.2 Duplicate Target Names</a></h3>
<p>Since there are two "Duplicate Target Names" section headers, we
cannot uniquely refer to either of them by name. If we try to (like
-this: <a href="#id92"><span class="problematic" id="id93">`Duplicate Target Names`_</span></a>), an error is generated.</p>
+this: <a href="#id90"><span class="problematic" id="id91">`Duplicate Target Names`_</span></a>), an error is generated.</p>
</div>
</div>
<div class="section" id="directives">
@@ -1369,16 +1369,16 @@
<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 104); <em><a href="#id24">backlink</a></em></p>
Undefined substitution referenced: "problematic".</div>
<div class="system-message" id="id86">
-<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 391); <em><a href="#id87">backlink</a></em></p>
+<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 391); <em><a href="#id17">backlink</a></em></p>
Unknown target name: "5".</div>
+<div class="system-message" id="id87">
+<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 400); <em><a href="#id19">backlink</a></em></p>
+Unknown target name: "nonexistent".</div>
<div class="system-message" id="id88">
-<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 400); <em><a href="#id89">backlink</a></em></p>
-Unknown target name: "nonexistent".</div>
+<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 427); <em><a href="#id89">backlink</a></em></p>
+Unknown target name: "hyperlink reference without a target".</div>
<div class="system-message" id="id90">
-<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 427); <em><a href="#id91">backlink</a></em></p>
-Unknown target name: "hyperlink reference without a target".</div>
-<div class="system-message" id="id92">
-<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 440); <em><a href="#id93">backlink</a></em></p>
+<p class="system-message-title">System Message: ERROR/3 (<tt class="docutils">functional/input/data/standard.txt</tt>, line 440); <em><a href="#id91">backlink</a></em></p>
Duplicate target name, cannot be used as a unique reference: "duplicate target names".</div>
</div>
</div>
Modified: trunk/docutils/test/functional/expected/standalone_rst_html5.html
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_html5.html 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/functional/expected/standalone_rst_html5.html 2017-04-06 13:10:34 UTC (rev 8055)
@@ -489,7 +489,7 @@
</dd>
<dt class="label" id="id16"><span class="brackets">4</span></dt>
<dd><p>Here's an unreferenced footnote, with a reference to a
-nonexistent footnote: <a href="#id96"><span class="problematic" id="id97"><span id="id17"></span>[5]_</span></a>.</p>
+nonexistent footnote: <a href="#id96"><span class="problematic" id="id17">[5]_</span></a>.</p>
</dd>
</dl>
</div>
@@ -501,7 +501,7 @@
rendered separately and differently from footnotes.</p>
</dd>
</dl>
-<p>Here's a reference to the above, <a class="citation-reference" href="#cit2002" id="id18">[CIT2002]</a>, and a <a href="#id98"><span class="problematic" id="id99"><span id="id19"></span>[nonexistent]_</span></a>
+<p>Here's a reference to the above, <a class="citation-reference" href="#cit2002" id="id18">[CIT2002]</a>, and a <a href="#id97"><span class="problematic" id="id19">[nonexistent]_</span></a>
citation.</p>
</div>
<div class="section" id="targets">
@@ -515,7 +515,7 @@
"<a class="reference external" href="http://www.python.org/">Python</a> <a class="footnote-reference brackets" href="#id25" id="id27">5</a>".</p>
<p>Targets may be indirect and anonymous. Thus <a class="reference internal" href="#targets">this phrase</a> may also
refer to the <a class="reference internal" href="#targets">Targets</a> section.</p>
-<p>Here's a <a href="#id100"><span class="problematic" id="id101">`hyperlink reference without a target`_</span></a>, which generates an
+<p>Here's a <a href="#id98"><span class="problematic" id="id99">`hyperlink reference without a target`_</span></a>, which generates an
error.</p>
<div class="section" id="duplicate-target-names">
<h3><a class="toc-backref" href="#id57"><span class="sectnum">2.13.1</span> Duplicate Target Names</a></h3>
@@ -527,7 +527,7 @@
<h3><a class="toc-backref" href="#id58"><span class="sectnum">2.13.2</span> Duplicate Target Names</a></h3>
<p>Since there are two "Duplicate Target Names" section headers, we
cannot uniquely refer to either of them by name. If we try to (like
-this: <a href="#id102"><span class="problematic" id="id103">`Duplicate Target Names`_</span></a>), an error is generated.</p>
+this: <a href="#id100"><span class="problematic" id="id101">`Duplicate Target Names`_</span></a>), an error is generated.</p>
</div>
</div>
<div class="section" id="directives">
@@ -1544,19 +1544,19 @@
<p>Undefined substitution referenced: "problematic".</p>
</div>
<div class="system-message" id="id96">
-<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 391); <em><a href="#id97">backlink</a></em></p>
+<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 391); <em><a href="#id17">backlink</a></em></p>
<p>Unknown target name: "5".</p>
</div>
-<div class="system-message" id="id98">
-<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 400); <em><a href="#id99">backlink</a></em></p>
+<div class="system-message" id="id97">
+<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 400); <em><a href="#id19">backlink</a></em></p>
<p>Unknown target name: "nonexistent".</p>
</div>
-<div class="system-message" id="id100">
-<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 427); <em><a href="#id101">backlink</a></em></p>
+<div class="system-message" id="id98">
+<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 427); <em><a href="#id99">backlink</a></em></p>
<p>Unknown target name: "hyperlink reference without a target".</p>
</div>
-<div class="system-message" id="id102">
-<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 440); <em><a href="#id103">backlink</a></em></p>
+<div class="system-message" id="id100">
+<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal">functional/input/data/standard.txt</span>, line 440); <em><a href="#id101">backlink</a></em></p>
<p>Duplicate target name, cannot be used as a unique reference: "duplicate target names".</p>
</div>
</div>
Modified: trunk/docutils/test/functional/expected/standalone_rst_latex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-04-06 13:10:34 UTC (rev 8055)
@@ -751,7 +751,6 @@
\DUfootnotetext{id16}{id16}{4}{%
Here’s an unreferenced footnote, with a reference to a
nonexistent footnote:%
-\raisebox{1em}{\hypertarget{id46}{}}%
\raisebox{1em}{\hypertarget{id17}{}}\hyperlink{id45}{\textbf{\color{red}{[}5{]}\_}}.
}
@@ -765,8 +764,7 @@
\end{figure}
Here’s a reference to the above, \hyperlink{cit2002}{[CIT2002]}, and a %
-\raisebox{1em}{\hypertarget{id48}{}}%
-\raisebox{1em}{\hypertarget{id19}{}}\hyperlink{id47}{\textbf{\color{red}{[}nonexistent{]}\_}}
+\raisebox{1em}{\hypertarget{id19}{}}\hyperlink{id46}{\textbf{\color{red}{[}nonexistent{]}\_}}
citation.
@@ -790,7 +788,7 @@
refer to the \hyperref[targets]{Targets} section.
Here’s a %
-\raisebox{1em}{\hypertarget{id50}{}}\hyperlink{id49}{\textbf{\color{red}`hyperlink reference without a target`\_}}, which generates an
+\raisebox{1em}{\hypertarget{id48}{}}\hyperlink{id47}{\textbf{\color{red}`hyperlink reference without a target`\_}}, which generates an
error.
@@ -810,7 +808,7 @@
Since there are two “Duplicate Target Names” section headers, we
cannot uniquely refer to either of them by name. If we try to (like
this: %
-\raisebox{1em}{\hypertarget{id52}{}}\hyperlink{id51}{\textbf{\color{red}`Duplicate Target Names`\_}}), an error is generated.
+\raisebox{1em}{\hypertarget{id50}{}}\hyperlink{id49}{\textbf{\color{red}`Duplicate Target Names`\_}}), an error is generated.
\subsection{2.14~~~Directives%
@@ -2811,37 +2809,37 @@
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~391
-\hyperlink{id46}{
+\hyperlink{id17}{
Unknown target name: \textquotedbl{}5\textquotedbl{}.
}}
\DUadmonition[system-message]{
\DUtitle[system-message]{system-message}
-\raisebox{1em}{\hypertarget{id47}{}}
+\raisebox{1em}{\hypertarget{id46}{}}
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~400
-\hyperlink{id48}{
+\hyperlink{id19}{
Unknown target name: \textquotedbl{}nonexistent\textquotedbl{}.
}}
\DUadmonition[system-message]{
\DUtitle[system-message]{system-message}
-\raisebox{1em}{\hypertarget{id49}{}}
+\raisebox{1em}{\hypertarget{id47}{}}
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~427
-\hyperlink{id50}{
+\hyperlink{id48}{
Unknown target name: \textquotedbl{}hyperlink reference without a target\textquotedbl{}.
}}
\DUadmonition[system-message]{
\DUtitle[system-message]{system-message}
-\raisebox{1em}{\hypertarget{id51}{}}
+\raisebox{1em}{\hypertarget{id49}{}}
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~440
-\hyperlink{id52}{
+\hyperlink{id50}{
Duplicate target name, cannot be used as a unique reference: \textquotedbl{}duplicate target names\textquotedbl{}.
}}
Modified: trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/functional/expected/standalone_rst_pseudoxml.txt 2017-04-06 13:10:34 UTC (rev 8055)
@@ -1044,7 +1044,7 @@
<paragraph>
Here's an unreferenced footnote, with a reference to a
nonexistent footnote:
- <problematic ids="id84 id17" refid="id83">
+ <problematic ids="id17" refid="id83">
[5]_
.
<section ids="citations" names="citations">
@@ -1063,7 +1063,7 @@
<citation_reference ids="id18" refid="cit2002">
CIT2002
, and a
- <problematic ids="id86 id19" refid="id85">
+ <problematic ids="id19" refid="id84">
[nonexistent]_
citation.
@@ -1114,7 +1114,7 @@
<target anonymous="1" ids="id20" refid="targets">
<paragraph>
Here's a
- <problematic ids="id88" refid="id87">
+ <problematic ids="id86" refid="id85">
`hyperlink reference without a target`_
, which generates an
error.
@@ -1139,7 +1139,7 @@
Since there are two "Duplicate Target Names" section headers, we
cannot uniquely refer to either of them by name. If we try to (like
this:
- <problematic ids="id90" refid="id89">
+ <problematic ids="id88" refid="id87">
`Duplicate Target Names`_
), an error is generated.
<section ids="directives" names="directives">
@@ -2307,16 +2307,16 @@
<system_message backrefs="id24" ids="id23" level="3" line="104" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>
Undefined substitution referenced: "problematic".
- <system_message backrefs="id84" ids="id83" level="3" line="391" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id17" ids="id83" level="3" line="391" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>
Unknown target name: "5".
- <system_message backrefs="id86" ids="id85" level="3" line="400" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id19" ids="id84" level="3" line="400" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>
Unknown target name: "nonexistent".
- <system_message backrefs="id88" ids="id87" level="3" line="427" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id86" ids="id85" level="3" line="427" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>
Unknown target name: "hyperlink reference without a target".
- <system_message backrefs="id90" ids="id89" level="3" line="440" source="functional/input/data/standard.txt" type="ERROR">
+ <system_message backrefs="id88" ids="id87" level="3" line="440" source="functional/input/data/standard.txt" type="ERROR">
<paragraph>
Duplicate target name, cannot be used as a unique reference: "duplicate target names".
<system_message level="1" line="163" source="functional/input/data/standard.txt" type="INFO">
Modified: trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2017-04-06 13:10:34 UTC (rev 8055)
@@ -756,7 +756,6 @@
\DUfootnotetext{id16}{id16}{4}{%
Here’s an unreferenced footnote, with a reference to a
nonexistent footnote:%
-\raisebox{1em}{\hypertarget{id48}{}}%
\raisebox{1em}{\hypertarget{id17}{}}\hyperlink{id47}{\textbf{\color{red}{[}5{]}\_}}.
}
@@ -770,8 +769,7 @@
\end{figure}
Here’s a reference to the above, \hyperlink{cit2002}{[CIT2002]}, and a %
-\raisebox{1em}{\hypertarget{id50}{}}%
-\raisebox{1em}{\hypertarget{id19}{}}\hyperlink{id49}{\textbf{\color{red}{[}nonexistent{]}\_}}
+\raisebox{1em}{\hypertarget{id19}{}}\hyperlink{id48}{\textbf{\color{red}{[}nonexistent{]}\_}}
citation.
@@ -795,7 +793,7 @@
refer to the \hyperref[targets]{Targets} section.
Here’s a %
-\raisebox{1em}{\hypertarget{id52}{}}\hyperlink{id51}{\textbf{\color{red}`hyperlink reference without a target`\_}}, which generates an
+\raisebox{1em}{\hypertarget{id50}{}}\hyperlink{id49}{\textbf{\color{red}`hyperlink reference without a target`\_}}, which generates an
error.
@@ -815,7 +813,7 @@
Since there are two “Duplicate Target Names” section headers, we
cannot uniquely refer to either of them by name. If we try to (like
this: %
-\raisebox{1em}{\hypertarget{id54}{}}\hyperlink{id53}{\textbf{\color{red}`Duplicate Target Names`\_}}), an error is generated.
+\raisebox{1em}{\hypertarget{id52}{}}\hyperlink{id51}{\textbf{\color{red}`Duplicate Target Names`\_}}), an error is generated.
\subsection{2.14 Directives%
@@ -2832,37 +2830,37 @@
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~391
-\hyperlink{id48}{
+\hyperlink{id17}{
Unknown target name: \textquotedbl{}5\textquotedbl{}.
}}
\DUadmonition[system-message]{
\DUtitle[system-message]{system-message}
-\raisebox{1em}{\hypertarget{id49}{}}
+\raisebox{1em}{\hypertarget{id48}{}}
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~400
-\hyperlink{id50}{
+\hyperlink{id19}{
Unknown target name: \textquotedbl{}nonexistent\textquotedbl{}.
}}
\DUadmonition[system-message]{
\DUtitle[system-message]{system-message}
-\raisebox{1em}{\hypertarget{id51}{}}
+\raisebox{1em}{\hypertarget{id49}{}}
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~427
-\hyperlink{id52}{
+\hyperlink{id50}{
Unknown target name: \textquotedbl{}hyperlink reference without a target\textquotedbl{}.
}}
\DUadmonition[system-message]{
\DUtitle[system-message]{system-message}
-\raisebox{1em}{\hypertarget{id53}{}}
+\raisebox{1em}{\hypertarget{id51}{}}
{\color{red}ERROR/3} in \texttt{functional/input/data/standard.txt}, line~440
-\hyperlink{id54}{
+\hyperlink{id52}{
Duplicate target name, cannot be used as a unique reference: \textquotedbl{}duplicate target names\textquotedbl{}.
}}
Modified: trunk/docutils/test/test_transforms/test_hyperlinks.py
===================================================================
--- trunk/docutils/test/test_transforms/test_hyperlinks.py 2017-03-31 15:47:13 UTC (rev 8054)
+++ trunk/docutils/test/test_transforms/test_hyperlinks.py 2017-04-06 13:10:34 UTC (rev 8055)
@@ -912,7 +912,7 @@
<document source="test data">
<paragraph>
Duplicate manual footnote labels, with reference (
- <problematic ids="id5 id1" refid="id4">
+ <problematic ids="id1" refid="id4">
[1]_
):
<footnote dupnames="1" ids="id2">
@@ -928,7 +928,7 @@
Duplicate explicit target name: "1".
<paragraph>
Footnote.
- <system_message backrefs="id5" ids="id4" level="3" line="1" source="test data" type="ERROR">
+ <system_message backrefs="id1" ids="id4" level="3" line="1" source="test data" type="ERROR">
<paragraph>
Duplicate target name, cannot be used as a unique reference: "1".
"""],
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-04-10 18:41:22
|
Revision: 8056
http://sourceforge.net/p/docutils/code/8056
Author: milde
Date: 2017-04-10 18:41:14 +0000 (Mon, 10 Apr 2017)
Log Message:
-----------
Document and update smartquotes.
Modified Paths:
--------------
trunk/docutils/docutils/utils/smartquotes.py
Added Paths:
-----------
trunk/docutils/docs/user/smartquotes.txt
Added: trunk/docutils/docs/user/smartquotes.txt
===================================================================
--- trunk/docutils/docs/user/smartquotes.txt (rev 0)
+++ trunk/docutils/docs/user/smartquotes.txt 2017-04-10 18:41:14 UTC (rev 8056)
@@ -0,0 +1,465 @@
+=========================
+Smart Quotes for Docutils
+=========================
+
+:Author: Günter Milde,
+ based on SmartyPants by John Gruber, Brad Choate, and Chad Miller
+:Contact: doc...@li...
+:Revision: $Revision$
+:Date: $Date$
+:License: Released under the terms of the `2-Clause BSD license`_
+:Abstract: This document describes the Docutils `smartquotes` module.
+
+.. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
+
+.. contents::
+
+Description
+===========
+
+The `smart_quotes configuration setting`_ triggers the SmartQuotes
+transformation on Text nodes that includes the following steps:
+
+- Straight quotes (``"`` and ``'``) into "curly" quote characters
+- dashes (``--`` and ``---``) into en- and em-dash entities
+- three consecutive dots (``...`` or ``. . .``) into an ellipsis entity.
+
+This means you can write, edit, and save your documents using plain old
+ASCII---straight quotes, plain dashes, and plain dots---while Docutils
+generates documents with typographical quotes, dashes, and ellipses.
+
+Advantages:
+
+* typing speed (especiall when blind-typing),
+* the possibility to change the quoting style of the
+ complete document with just one configuration option, and
+* restriction to 7-bit characters in the source.
+
+However, there are `algorithmic shortcomings`_ for 2 reasons:
+
+* Dual use of the "ASCII-apostrophe" (') as single quote and apostrophe,
+* languages that do not use whitespace around words.
+
+So, please consider also
+`Why You Might Not Want to Use "Smart" Quotes in Your Documents`_.
+
+.. _smart_quotes configuration setting: config.html#smart-quotes
+
+
+Escaping
+========
+
+The `SmartQuotes` transform does not modify characters in literal text
+such as source code, maths, or literal blocks.
+
+If you need literal straight quotes (or plain hyphens and periods) in normal
+text, you can backslash escape the characters to preserve
+ASCII-punctuation. You need two backslashes as one backslash is removed by
+the reStructuredText `escaping mechanism`_.
+
+======== ========= ======== =========
+Escape Character Escape Character
+======== ========= ======== =========
+``\\`` \\ ``\\.`` \\.
+``\\"`` \\" ``\\-`` \\-
+``\\'`` \\' ``\\``` \\`
+======== ========= ======== =========
+
+This is useful, for example, when you want to use straight quotes as
+foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
+
+.. _escaping mechanism: ../ref/rst/restructuredtext.html#escaping-mechanism
+
+
+Localisation
+============
+
+Quotation marks have a `variety of forms`__ in different languages and
+media.
+
+`Smartquotes` selects quotation marks depending on the language of the current
+block element and the value of the `smart_quotes configuration setting`_.
+
+__ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table
+
+Docutils' `smartquotes` support the following languages:
+
+
+.. class:: language-af
+
+"'Afrikaans' quotes"
+
+.. class:: language-af-x-altquot
+
+"'Afrikaans' alternative quotes"
+
+.. class:: language-ca
+
+"'Catalan' quotes"
+
+.. class:: language-ca-x-altquot
+
+"'Catalan' alternative quotes"
+
+.. class:: language-cs
+
+"'Czech' quotes"
+
+.. class:: language-cs-x-altquot
+
+"'Czech' alternative quotes"
+
+.. class:: language-da
+
+"'Danish' quotes"
+
+.. class:: language-da-x-altquot
+
+"'Danish' alternative quotes"
+
+.. class:: language-de
+
+"'German' quotes"
+
+.. class:: language-de-x-altquot
+
+"'German' alternative quotes"
+
+.. class:: language-de-ch
+
+"'Swiss-German' quotes"
+
+.. class:: language-el
+
+"'Greek' quotes"
+
+.. class:: language-en
+
+"'English' quotes"
+
+.. class:: language-en-uk-x-altquot
+
+"'British' alternative quotes"
+(swaps single and double quotes: ``"`` → ‘ and ``'`` → “)
+
+.. class:: language-eo
+
+"'Esperanto' quotes"
+
+.. class:: language-es
+
+"'Spanish' quotes"
+
+.. class:: language-es-x-altquot
+
+"'Spanish' alternative quotes"
+
+.. class:: language-et
+
+"'Estonian' quotes" (no secondary quote listed in Wikipedia)
+
+.. class:: language-et-x-altquot
+
+"'Estonian' alternative quotes"
+
+.. class:: language-eu
+
+"'Basque' quotes"
+
+.. class:: language-fi
+
+"'Finnish' quotes"
+
+.. class:: language-fi-x-altquot
+
+"'Finnish' alternative quotes"
+
+.. class:: language-fr
+
+"'French' quotes"
+
+.. class:: language-fr-x-altquot
+
+"'French' alternative quotes"
+
+.. class:: language-fr-ch
+
+"'Swiss-French' quotes"
+
+.. class:: language-fr-ch-x-altquot
+
+"'Swiss-French' alternative quotes" (narrow no-break space, see
+http://typoguide.ch/)
+
+.. class:: language-gl
+
+"'Galician' quotes"
+
+.. class:: language-he
+
+"'Hebrew' quotes"
+
+.. class:: language-he-x-altquot
+
+"'Hebrew' alternative quotes"
+
+.. class:: language-hr
+
+"'Croatian' quotes"
+
+.. class:: language-hr-x-altquot
+
+"'Croatian' alternative quotes"
+
+.. class:: language-hsb
+
+"'Upper Sorbian' quotes"
+
+.. class:: language-hsb-x-altquot
+
+"'Upper Sorbian' alternative quotes"
+
+.. class:: language-hu
+
+"'Hungarian' quotes"
+
+.. class:: language-is
+
+"'Icelandic' quotes"
+
+.. class:: language-it
+
+"'Italian' quotes"
+
+.. class:: language-it-ch
+
+"'Swiss-Italian' quotes"
+
+.. class:: language-it-x-altquot
+
+"'Italian' alternative quotes"
+
+.. class:: language-ja
+
+"'Japanese' quotes"
+
+.. class:: language-lt
+
+"'Lithuanian' quotes"
+
+.. class:: language-lv
+
+"'Latvian' quotes"
+
+.. class:: language-nl
+
+"'Dutch' quotes"
+
+.. class:: language-nl-x-altquot
+
+"'Dutch' alternative quotes"
+
+.. # 'nl-x-altquot2': u'””’’',
+
+.. class:: language-pl
+
+"'Polish' quotes"
+
+.. class:: language-pl-x-altquot
+
+"'Polish' alternative quotes"
+
+.. class:: language-pt
+
+"'Portuguese' quotes"
+
+.. class:: language-pt-br
+
+"'Portuguese (Brazil)' quotes"
+
+.. class:: language-ro
+
+"'Romanian' quotes"
+
+.. class:: language-ru
+
+"'Russian' quotes"
+
+.. class:: language-sh
+
+"'Serbo-Croatian' quotes"
+
+.. class:: language-sh-x-altquot
+
+"'Serbo-Croatian' alternative quotes"
+
+.. class:: language-sk
+
+"'Slovak' quotes"
+
+.. class:: language-sk-x-altquot
+
+"'Slovak' alternative quotes"
+
+.. class:: language-sl
+
+"'Slovenian' quotes"
+
+.. class:: language-sl-x-altquot
+
+"'Slovenian' alternative quotes"
+
+.. class:: language-sr
+
+"'Serbian' quotes"
+
+.. class:: language-sr-x-altquot
+
+"'Serbian' alternative quotes"
+
+.. class:: language-sv
+
+"'Swedish' quotes"
+
+.. class:: language-sv-x-altquot
+
+"'Swedish' alternative quotes"
+
+.. class:: language-tr
+
+"'Turkish' quotes"
+
+.. class:: language-tr-x-altquot
+
+"'Turkish' alternative quotes"
+
+.. 'tr-x-altquot2': u'“„‘‚', # antiquated?
+
+.. class:: language-uk
+
+"'Ukrainian' quotes"
+
+.. class:: language-uk-x-altquot
+
+"'Ukrainian' alternative quotes"
+
+.. class:: language-zh-cn
+
+"'Chinese (China)' quotes"
+
+.. class:: language-zh-tw
+
+"'Chinese (Taiwan)' quotes"
+
+Quotes in text blocks in a non-supported language use the document
+language:
+
+.. class:: langugage-undefined-example
+
+"'Undefined' quotes"
+
+
+Caveats
+=======
+
+Why You Might Not Want to Use "Smart" Quotes in Your Documents
+--------------------------------------------------------------
+
+For one thing, you might not care.
+
+Most normal, mentally stable individuals do not take notice of proper
+typographic punctuation. Many design and typography nerds, however, break
+out in a nasty rash when they encounter, say, a restaurant sign that uses
+a straight apostrophe to spell "Joe's".
+
+If you're the sort of person who just doesn't care, you might well want to
+continue not caring. Using straight quotes -- and sticking to the 7-bit
+ASCII character set in general -- is certainly a simpler way to live.
+
+Even if you *do* care about accurate typography, you still might want to
+think twice before "auto-educating" the quote characters in your documents.
+As there is always a chance that the algorithm gets it wrong, you may
+instead prefer to use the compose key or some other means to insert the
+correct Unicode characters into the source.
+
+
+Algorithmic Shortcomings
+------------------------
+
+The ASCII character (u0027 APOSTROPHE) is used for apostrophe and single
+quotes. If use inside a word, it is converted into an apostrophe:
+
+ .. class:: language-fr
+
+ Il dit : "C'est 'super' !"
+
+At the beginning or end of a word, it cannot be distinguished from a single
+quote by the algorithm.
+
+The `right single quotation mark`_ character -- used to close a secondary
+(inner) quote in English -- is also "the preferred character to use for
+apostrophe" (Unicode_). Therefore, "educating" works as expected for
+apostrophes at the end of a word, e.g.,
+
+ Mr. Hastings' pen; three days' leave; my two cents' worth.
+
+However, when apostrophes are used at the start of leading contractions,
+"educating" will turn the apostrophe into an *opening* secondary quote. In
+English, this is *not* the apostrophe character, e.g., ``'Twas brillig``
+is "miseducated" to
+
+ 'Twas brillig.
+
+In other locales (French, Italian, German, ...), secondary closing quotes
+differ from the apostrophe. A text like::
+
+ .. class:: language-de-CH
+
+ "Er sagt: 'Ich fass' es nicht.'"
+
+becomes
+
+ .. class:: language-de-CH
+
+ "Er sagt: 'Ich fass' es nicht.'"
+
+with a single closing guillemet in place of the apostrophe.
+
+In such cases, it's best to use the recommended apostrophe character (’) in
+the source:
+
+ | ’Twas brillig, and the slithy toves
+ | Did gyre and gimble in the wabe;
+ | All mimsy were the borogoves,
+ | And the mome raths outgrabe.
+
+.. _right single quotation mark:
+ http://www.fileformat.info/info/unicode/char/2019/index.htm
+.. _Unicode: http://www.unicode.org/charts/PDF/U2000.pdf
+
+History
+=======
+
+The smartquotes module is an adaption of "SmartyPants_" to Docutils.
+
+`John Gruber`_ did all of the hard work of writing this software in Perl for
+`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
+ported it to Python to use with Pyblosxom_.
+
+Portions of the SmartyPants original work are based on Brad Choate's nifty
+MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
+this plug-in. Brad Choate is a fine hacker indeed.
+`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
+testing of the original SmartyPants.
+
+Internationalization and adaption to Docutils by Günter Milde.
+
+.. _SmartyPants: http://daringfireball.net/projects/smartypants/
+.. _Pyblosxom: http://pyblosxom.bluesock.org/
+.. _Movable Type: http://www.movabletype.org/
+.. _John Gruber: http://daringfireball.net/
+.. _Chad Miller: http://web.chad.org/
+.. _Brad Choate: http://bradchoate.com/
+.. _Jeremy Hedley: http://antipixel.com/
+.. _Charles Wiltgen: http://playbacktime.com/
+.. _Rael Dornfest: http://raelity.org/
Property changes on: trunk/docutils/docs/user/smartquotes.txt
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-04-06 13:10:34 UTC (rev 8055)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-04-10 18:41:14 UTC (rev 8056)
@@ -17,26 +17,26 @@
r"""
-========================
-SmartyPants for Docutils
-========================
+=========================
+Smart Quotes for Docutils
+=========================
Synopsis
========
-Smart-quotes for Docutils.
+"SmartyPants" is a free web publishing plug-in for Movable Type, Blosxom, and
+BBEdit that easily translates plain ASCII punctuation characters into "smart"
+typographic punctuation characters.
-The original "SmartyPants" is a free web publishing plug-in for Movable Type,
-Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
-into "smart" typographic punctuation characters.
+``smartquotes.py`` is an adaption of "SmartyPants" to Docutils_.
-`smartypants.py`, endeavours to be a functional port of
-SmartyPants to Python, for use with Pyblosxom_.
+* Using Unicode characters instead of HTML entities for typographic quotes, it
+ works for any output format that supports Unicode.
+* Support `language specific quote characters`__.
-`smartquotes.py` is an adaption of Smartypants to Docutils_. By using Unicode
-characters instead of HTML entities for typographic quotes, it works for any
-output format that supports Unicode.
+__ http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
+
Authors
=======
@@ -182,69 +182,51 @@
Options
=======
-For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
-specify configuration options.
-
Numeric values are the easiest way to configure SmartyPants' behavior:
-"0"
- Suppress all transformations. (Do nothing.)
-"1"
- Performs default SmartyPants transformations: quotes (including
+:0: Suppress all transformations. (Do nothing.)
+
+:1: Performs default SmartyPants transformations: quotes (including
\`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
is used to signify an em-dash; there is no support for en-dashes
-"2"
- Same as smarty_pants="1", except that it uses the old-school typewriter
+:2: Same as smarty_pants="1", except that it uses the old-school typewriter
shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
(dash dash dash)
for em-dashes.
-"3"
- Same as smarty_pants="2", but inverts the shorthand for dashes:
+:3: Same as smarty_pants="2", but inverts the shorthand for dashes:
"``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
en-dashes.
-"-1"
- Stupefy mode. Reverses the SmartyPants transformation process, turning
+:-1: Stupefy mode. Reverses the SmartyPants transformation process, turning
the characters produced by SmartyPants into their ASCII equivalents.
- E.g. "“" is turned into a simple double-quote (\"), "—" is
- turned into two dashes, etc.
+ E.g. the LEFT DOUBLE QUOTATION MARK (“) is turned into a simple
+ double-quote (\"), "—" is turned into two dashes, etc.
The following single-character attribute values can be combined to toggle
individual transformations from within the smarty_pants attribute. For
-example, to educate normal quotes and em-dashes, but not ellipses or
-\`\`backticks'' -style quotes:
-
-E.g. ``py['smartypants_attributes'] = "1"`` is equivalent to
+example, ``py['smartypants_attributes'] = "1"`` is equivalent to
``py['smartypants_attributes'] = "qBde"``.
-"q"
- Educates normal quote characters: (") and (').
+:q: Educates normal quote characters: (") and (').
-"b"
- Educates \`\`backticks'' -style double quotes.
+:b: Educates \`\`backticks'' -style double quotes.
-"B"
- Educates \`\`backticks'' -style double quotes and \`single' quotes.
+:B: Educates \`\`backticks'' -style double quotes and \`single' quotes.
-"d"
- Educates em-dashes.
+:d: Educates em-dashes.
-"D"
- Educates em-dashes and en-dashes, using old-school typewriter shorthand:
+:D: Educates em-dashes and en-dashes, using old-school typewriter shorthand:
(dash dash) for en-dashes, (dash dash dash) for em-dashes.
-"i"
- Educates em-dashes and en-dashes, using inverted old-school typewriter
+:i: Educates em-dashes and en-dashes, using inverted old-school typewriter
shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
-"e"
- Educates ellipses.
+:e: Educates ellipses.
-"w"
- Translates any instance of ``"`` into a normal double-quote character.
+:w: Translates any instance of ``"`` into a normal double-quote character.
This should be of no interest to most people, but of particular interest
to anyone who writes their posts using Dreamweaver, as Dreamweaver
inexplicably uses this entity to represent a literal double-quote
@@ -301,9 +283,9 @@
------------------------
One situation in which quotes will get curled the wrong way is when
-apostrophes are used at the start of leading contractions. For example:
+apostrophes are used at the start of leading contractions. For example::
-``'Twas the night before Christmas.``
+ 'Twas the night before Christmas.
In the case above, SmartyPants will turn the apostrophe into an opening
single-quote, when in fact it should be the `right single quotation mark`
@@ -402,13 +384,19 @@
apostrophe = u'’' # "’" RIGHT SINGLE QUOTATION MARK
# quote characters (language-specific, set in __init__())
- # http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
- # http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen
- # https://fr.wikipedia.org/wiki/Guillemet
- # http://typographisme.net/post/Les-espaces-typographiques-et-le-web
- # http://www.btb.termiumplus.gc.ca/tpv2guides/guides/redac/index-fra.html
- # https://en.wikipedia.org/wiki/Hebrew_punctuation#Quotation_marks
- # http://www.tustep.uni-tuebingen.de/bi/bi00/bi001t1-anfuehrung.pdf
+ # [1] http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
+ # [2] http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen
+ # [3] https://fr.wikipedia.org/wiki/Guillemet
+ # [4] http://typographisme.net/post/Les-espaces-typographiques-et-le-web
+ # [5] http://www.btb.termiumplus.gc.ca/tpv2guides/guides/redac/index-fra.html
+ # [6] https://en.wikipedia.org/wiki/Hebrew_punctuation#Quotation_marks
+ # [7] http://www.tustep.uni-tuebingen.de/bi/bi00/bi001t1-anfuehrung.pdf
+ #
+ # TODO: configuration option, e.g.::
+ #
+ # smartquote-locales: nl: „“’’, # apostrophe for ``'s Gravenhage``
+ # nr: se, # alias
+ # fr: « : »:‹ : ›, # :-separated list with NBSPs
quotes = {'af': u'“”‘’',
'af-x-altquot': u'„”‚’',
'ca': u'«»“”',
@@ -415,8 +403,9 @@
'ca-x-altquot': u'“”‘’',
'cs': u'„“‚‘',
'cs-x-altquot': u'»«›‹',
- 'da': u'»«‘’',
+ 'da': u'»«›‹',
'da-x-altquot': u'„“‚‘',
+ # 'da-x-altquot2': u'””’’',
'de': u'„“‚‘',
'de-x-altquot': u'»«›‹',
'de-ch': u'«»‹›',
@@ -436,17 +425,19 @@
'fr-ch': u'«»‹›',
'fr-ch-x-altquot': (u'« ', u' »', u'‹ ', u' ›'), # narrow no-break space, http://typoguide.ch/
'gl': u'«»“”',
- 'he': u'”“»«',
- 'he-x-altquot': u'„”‚’',
- 'hr': u'„”‘’',
+ 'he': u'”“»«', # Hebrew is RTL, test position:
+ 'he-x-altquot': u'„”‚’', # low quotation marks are opening.
+ # 'he-x-altquot': u'“„‘‚', # RTL: low quotation marks opening
+ 'hr': u'„”‘’', # http://hrvatska-tipografija.com/polunavodnici/
'hr-x-altquot': u'»«›‹',
'hsb': u'„“‚‘',
'hsb-x-altquot':u'»«›‹',
'hu': u'„”«»',
+ 'is': u'„“‚‘',
'it': u'«»“”',
'it-ch': u'«»‹›',
'it-x-altquot': u'“”‘’',
- # 'it-x-altquot2': u'“„‘‚', # antiquated?
+ # 'it-x-altquot2': u'“„‘‚', # [7] antiquated?
'ja': u'「」『』',
'lt': u'„“‚‘',
'lv': u'„“‚‘',
@@ -454,23 +445,27 @@
'nl-x-altquot': u'„”‚’',
# 'nl-x-altquot2': u'””’’',
'pl': u'„”«»',
- 'pl-x-altquot': u'«»“”',
+ 'pl-x-altquot': u'«»‚’',
+ # 'pl-x-altquot2': u'„”‚’', # https://pl.wikipedia.org/wiki/Cudzys%C5%82%C3%B3w
'pt': u'«»“”',
'pt-br': u'“”‘’',
'ro': u'„”«»',
'ru': u'«»„“',
- 'sh': u'„”‚’',
+ 'sh': u'„”‚’', # Serbo-Croatian
'sh-x-altquot': u'»«›‹',
- 'sk': u'„“‚‘',
+ 'sk': u'„“‚‘', # Slovak
'sk-x-altquot': u'»«›‹',
+ 'sl': u'„“‚‘', # Slovenian
+ 'sl-x-altquot': u'»«›‹',
+ 'sq': u'«»‹›', # Albanian
+ 'sq-x-altquot': u'“„‘‚',
'sr': u'„”’’',
- 'sl': u'„“‚‘',
- 'sl-x-altquot': u'»«›‹',
+ 'sr-x-altquot': u'»«›‹',
'sv': u'””’’',
'sv-x-altquot': u'»»››',
'tr': u'“”‘’',
'tr-x-altquot': u'«»‹›',
- # 'tr-x-altquot2': u'“„‘‚', # antiquated?
+ # 'tr-x-altquot2': u'“„‘‚', # [7] antiquated?
'uk': u'«»„“',
'uk-x-altquot': u'„“‚‘',
'zh-cn': u'“”‘’',
@@ -921,8 +916,7 @@
print docstring_html
-
- # Unit test output goes out stderr.
+ # Unit test output goes to stderr.
import unittest
sp = smartyPants
@@ -931,17 +925,12 @@
def test_dates(self):
self.assertEqual(sp("1440-80's"), u"1440-80’s")
- self.assertEqual(sp("1440-'80s"), u"1440-‘80s")
- self.assertEqual(sp("1440---'80s"), u"1440–‘80s")
- self.assertEqual(sp("1960s"), "1960s") # no effect.
+ self.assertEqual(sp("1440-'80s"), u"1440-’80s")
+ self.assertEqual(sp("1440---'80s"), u"1440–’80s")
self.assertEqual(sp("1960's"), u"1960’s")
- self.assertEqual(sp("one two '60s"), u"one two ‘60s")
- self.assertEqual(sp("'60s"), u"‘60s")
+ self.assertEqual(sp("one two '60s"), u"one two ’60s")
+ self.assertEqual(sp("'60s"), u"’60s")
- def test_ordinal_numbers(self):
- self.assertEqual(sp("21st century"), "21st century") # no effect.
- self.assertEqual(sp("3rd"), "3rd") # no effect.
-
def test_educated_quotes(self):
self.assertEqual(sp('''"Isn't this fun?"'''), u'“Isn’t this fun?”')
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-04-19 16:45:35
|
Revision: 8058
http://sourceforge.net/p/docutils/code/8058
Author: milde
Date: 2017-04-19 16:45:32 +0000 (Wed, 19 Apr 2017)
Log Message:
-----------
LaTeX: Define required Unicode characters in the preamble.
If the encoding is utf8, we can define missing characters once in the
preamble instead of translating every occurence into a LaTeX command.
Modified Paths:
--------------
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/test/functional/expected/latex_babel.tex
trunk/docutils/test/functional/expected/standalone_rst_latex.tex
trunk/docutils/test/test_writers/test_latex2e.py
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2017-04-15 19:09:59 UTC (rev 8057)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2017-04-19 16:45:32 UTC (rev 8058)
@@ -727,19 +727,31 @@
}
# Unicode chars that are not recognized by LaTeX's utf8 encoding
unsupported_unicode = {
- 0x00A0: ur'~', # NO-BREAK SPACE
# TODO: ensure white space also at the beginning of a line?
# 0x00A0: ur'\leavevmode\nobreak\vadjust{}~'
+ 0x2000: ur'\enskip', # EN QUAD
+ 0x2001: ur'\quad', # EM QUAD
+ 0x2002: ur'\enskip', # EN SPACE
+ 0x2003: ur'\quad', # EM SPACE
0x2008: ur'\,', # PUNCTUATION SPACE
+ 0x200b: ur'\hspace{0pt}', # ZERO WIDTH SPACE
+ 0x202F: ur'\,', # NARROW NO-BREAK SPACE
+ # 0x02d8: ur'\\u{ }', # BREVE
0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN
- 0x202F: ur'\,', # NARROW NO-BREAK SPACE
- 0x21d4: ur'$\Leftrightarrow$',
+ 0x212b: ur'\AA', # ANGSTROM SIGN
+ 0x21d4: ur'\ensuremath{\Leftrightarrow}',
# Docutils footnote symbols:
- 0x2660: ur'$\spadesuit$',
- 0x2663: ur'$\clubsuit$',
+ 0x2660: ur'\ensuremath{\spadesuit}',
+ 0x2663: ur'\ensuremath{\clubsuit}',
+ 0xfb00: ur'ff', # LATIN SMALL LIGATURE FF
+ 0xfb01: ur'fi', # LATIN SMALL LIGATURE FI
+ 0xfb02: ur'fl', # LATIN SMALL LIGATURE FL
+ 0xfb03: ur'ffi', # LATIN SMALL LIGATURE FFI
+ 0xfb04: ur'ffl', # LATIN SMALL LIGATURE FFL
}
# Unicode chars that are recognized by LaTeX's utf8 encoding
utf8_supported_unicode = {
+ 0x00A0: ur'~', # NO-BREAK SPACE
0x00AB: ur'\guillemotleft{}', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bb: ur'\guillemotright{}', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x200C: ur'\textcompwordmark{}', # ZERO WIDTH NON-JOINER
@@ -1503,17 +1515,24 @@
table[ord(' ')] = ur'~'
# Unicode replacements for 8-bit tex engines (not required with XeTeX/LuaTeX):
if not self.is_xetex:
- table.update(CharMaps.unsupported_unicode)
if not self.latex_encoding.startswith('utf8'):
+ table.update(CharMaps.unsupported_unicode)
table.update(CharMaps.utf8_supported_unicode)
table.update(CharMaps.textcomp)
table.update(CharMaps.pifont)
# Characters that require a feature/package to render
- if [True for ch in text if ord(ch) in CharMaps.textcomp]:
- self.requirements['textcomp'] = PreambleCmds.textcomp
- if [True for ch in text if ord(ch) in CharMaps.pifont]:
+ for ch in text:
+ cp = ord(ch)
+ if cp in CharMaps.textcomp:
+ self.requirements['textcomp'] = PreambleCmds.textcomp
+ elif cp in CharMaps.pifont:
self.requirements['pifont'] = '\\usepackage{pifont}'
-
+ # preamble-definitions for unsupported Unicode characters
+ elif (self.latex_encoding == 'utf8'
+ and cp in CharMaps.unsupported_unicode):
+ self.requirements['_inputenc'+str(cp)] = (
+ '\\DeclareUnicodeCharacter{%04X}{%s}'
+ % (cp, CharMaps.unsupported_unicode[cp]))
text = text.translate(table)
# Break up input ligatures e.g. '--' to '-{}-'.
Modified: trunk/docutils/test/functional/expected/latex_babel.tex
===================================================================
--- trunk/docutils/test/functional/expected/latex_babel.tex 2017-04-15 19:09:59 UTC (rev 8057)
+++ trunk/docutils/test/functional/expected/latex_babel.tex 2017-04-19 16:45:32 UTC (rev 8058)
@@ -40,17 +40,17 @@
quote (\textquotedbl{}). Problematic is the tilde character (\textasciitilde{}) which is regularely used
for no-break spaces but redefined by some language definition files:
-English: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and~no-break~spaces
+English: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and no-break spaces
-\foreignlanguage{basque}{Basque: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and~no-break~spaces}
+\foreignlanguage{basque}{Basque: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and no-break spaces}
-\foreignlanguage{esperanto}{Esperanto: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and~no-break~spaces}
+\foreignlanguage{esperanto}{Esperanto: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and no-break spaces}
-\foreignlanguage{estonian}{Estonian: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and~no-break~spaces}
+\foreignlanguage{estonian}{Estonian: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and no-break spaces}
-\foreignlanguage{galician}{Galician: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and~no-break~spaces}
+\foreignlanguage{galician}{Galician: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and no-break spaces}
-\foreignlanguage{ngerman}{German: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and~no-break~spaces}
+\foreignlanguage{ngerman}{German: 'an' \textquotedbl{}active\textquotedbl{}-quote, \textasciicircum{}circumflex, and no-break spaces}
Spanish: option clash with Galician!
Modified: trunk/docutils/test/functional/expected/standalone_rst_latex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-04-15 19:09:59 UTC (rev 8057)
+++ trunk/docutils/test/functional/expected/standalone_rst_latex.tex 2017-04-19 16:45:32 UTC (rev 8058)
@@ -4,6 +4,9 @@
\usepackage{ifthen}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
+\DeclareUnicodeCharacter{21D4}{\ensuremath{\Leftrightarrow}}
+\DeclareUnicodeCharacter{2660}{\ensuremath{\spadesuit}}
+\DeclareUnicodeCharacter{2663}{\ensuremath{\clubsuit}}
\usepackage{amsmath}
\usepackage[british,french,ngerman,english]{babel}
% Prevent side-effects if French hyphenation patterns are not loaded:
@@ -289,12 +292,12 @@
\tableofcontents
-\section{1~~~Structural Elements%
+\section{1 Structural Elements%
\label{structural-elements}%
}
-\subsection{1.1~~~Section Title%
+\subsection{1.1 Section Title%
\label{section-title}%
}
\subsubsection*{Section Subtitle}
@@ -304,12 +307,12 @@
\texttt{sectsubtitle-xform} configuration value.
-\subsection{1.2~~~Empty Section%
+\subsection{1.2 Empty Section%
\label{empty-section}%
}
-\subsection{1.3~~~Transitions%
+\subsection{1.3 Transitions%
\label{transitions}%
}
@@ -324,12 +327,12 @@
\DUtransition
-\section{2~~~Body Elements%
+\section{2 Body Elements%
\label{body-elements}%
}
-\subsection{2.1~~~Paragraphs%
+\subsection{2.1 Paragraphs%
\label{paragraphs}%
}
@@ -336,7 +339,7 @@
A paragraph.
-\subsubsection{2.1.1~~~Inline Markup%
+\subsubsection{2.1.1 Inline Markup%
\label{inline-markup}%
}
@@ -375,7 +378,7 @@
live link to PEP 258 here.
-\subsection{2.2~~~Bullet Lists%
+\subsection{2.2 Bullet Lists%
\label{bullet-lists}%
}
@@ -413,7 +416,7 @@
\end{itemize}
-\subsection{2.3~~~Enumerated Lists%
+\subsection{2.3 Enumerated Lists%
\label{enumerated-lists}%
}
@@ -466,7 +469,7 @@
\end{enumerate}
-\subsection{2.4~~~Definition Lists%
+\subsection{2.4 Definition Lists%
\label{definition-lists}%
}
@@ -488,7 +491,7 @@
\end{description}
-\subsection{2.5~~~Field Lists%
+\subsection{2.5 Field Lists%
\label{field-lists}%
}
@@ -511,7 +514,7 @@
\end{DUfieldlist}
-\subsection{2.6~~~Option Lists%
+\subsection{2.6 Option Lists%
\label{option-lists}%
}
@@ -546,7 +549,7 @@
description.
-\subsection{2.7~~~Literal Blocks%
+\subsection{2.7 Literal Blocks%
\label{literal-blocks}%
}
@@ -573,7 +576,7 @@
\end{quote}
-\subsection{2.8~~~Line Blocks%
+\subsection{2.8 Line Blocks%
\label{line-blocks}%
}
@@ -659,7 +662,7 @@
\item[] Durch ihres Rumpfs verengten Schacht
\item[] fließt weißes Mondlicht
\item[] still und heiter
-\item[] auf ~ ihren
+\item[] auf ihren
\item[] Waldweg
\item[] u. s.
\item[] w.
@@ -668,7 +671,7 @@
\end{selectlanguage}
-\subsection{2.9~~~Block Quotes%
+\subsection{2.9 Block Quotes%
\label{block-quotes}%
}
@@ -698,7 +701,7 @@
\end{selectlanguage}
-\subsection{2.10~~~Doctest Blocks%
+\subsection{2.10 Doctest Blocks%
\label{doctest-blocks}%
}
@@ -712,7 +715,7 @@
\end{quote}
-\subsection{2.11~~~Footnotes%
+\subsection{2.11 Footnotes%
\label{footnotes}%
}
%
@@ -755,7 +758,7 @@
}
-\subsection{2.12~~~Citations%
+\subsection{2.12 Citations%
\label{citations}%
}
\begin{figure}[b]\raisebox{1em}{\hypertarget{cit2002}{}}[CIT2002]
@@ -768,7 +771,7 @@
citation.
-\subsection{2.13~~~Targets%
+\subsection{2.13 Targets%
\label{targets}%
\label{another-target}%
}
@@ -792,7 +795,7 @@
error.
-\subsubsection{2.13.1~~~Duplicate Target Names%
+\subsubsection{2.13.1 Duplicate Target Names%
\label{duplicate-target-names}%
}
@@ -801,7 +804,7 @@
explicit targets will generate “warning” (level-2) system messages.
-\subsubsection{2.13.2~~~Duplicate Target Names%
+\subsubsection{2.13.2 Duplicate Target Names%
\label{id21}%
}
@@ -811,7 +814,7 @@
\raisebox{1em}{\hypertarget{id50}{}}\hyperlink{id49}{\textbf{\color{red}`Duplicate Target Names`\_}}), an error is generated.
-\subsection{2.14~~~Directives%
+\subsection{2.14 Directives%
\label{directives}%
}
@@ -821,7 +824,7 @@
\url{http://docutils.sourceforge.net/docs/ref/rst/directives.html}.
-\subsubsection{2.14.1~~~Document Parts%
+\subsubsection{2.14.1 Document Parts%
\label{document-parts}%
}
@@ -830,7 +833,7 @@
document (a document-wide \hyperref[table-of-contents]{table of contents}).
-\subsubsection{2.14.2~~~Images and Figures%
+\subsubsection{2.14.2 Images and Figures%
\label{images-and-figures}%
}
@@ -1117,7 +1120,7 @@
\label{target2}\label{target1}
-\subsubsection{2.14.3~~~Admonitions%
+\subsubsection{2.14.3 Admonitions%
\label{admonitions}%
}
@@ -1191,7 +1194,7 @@
}
-\subsubsection{2.14.4~~~Topics, Sidebars, and Rubrics%
+\subsubsection{2.14.4 Topics, Sidebars, and Rubrics%
\label{topics-sidebars-and-rubrics}%
}
@@ -1231,7 +1234,7 @@
allowed (e.g. inside a directive).
-\subsubsection{2.14.5~~~Target Footnotes%
+\subsubsection{2.14.5 Target Footnotes%
\label{target-footnotes}%
}
%
@@ -1256,7 +1259,7 @@
}
-\subsubsection{2.14.6~~~Replacement Text%
+\subsubsection{2.14.6 Replacement Text%
\label{replacement-text}%
}
@@ -1263,7 +1266,7 @@
I recommend you try \href{http://www.python.org/}{Python, \emph{the} best language around}\DUfootnotemark{id32}{id29}{5}.
-\subsubsection{2.14.7~~~Compound Paragraph%
+\subsubsection{2.14.7 Compound Paragraph%
\label{compound-paragraph}%
}
@@ -1400,7 +1403,7 @@
\end{DUclass}
-\subsubsection{2.14.8~~~Parsed Literal Blocks%
+\subsubsection{2.14.8 Parsed Literal Blocks%
\label{parsed-literal-blocks}%
}
@@ -1418,7 +1421,7 @@
\end{quote}
-\subsubsection{2.14.9~~~Code%
+\subsubsection{2.14.9 Code%
\label{code}%
}
@@ -1477,7 +1480,7 @@
\end{DUclass}
-\subsection{2.15~~~Substitution Definitions%
+\subsection{2.15 Substitution Definitions%
\label{substitution-definitions}%
}
@@ -1486,7 +1489,7 @@
(Substitution definitions are not visible in the HTML source.)
-\subsection{2.16~~~Comments%
+\subsection{2.16 Comments%
\label{comments}%
}
@@ -1503,7 +1506,7 @@
(View the HTML source to see the comment.)
-\subsection{2.17~~~Raw text%
+\subsection{2.17 Raw text%
\label{raw-text}%
}
@@ -1522,7 +1525,7 @@
Fifth test in LaTeX.\\Line two.
-\subsection{2.18~~~Container%
+\subsection{2.18 Container%
\label{container}%
}
@@ -1537,7 +1540,7 @@
% .. include:: data/header_footer.txt
-\subsection{2.19~~~Colspanning tables%
+\subsection{2.19 Colspanning tables%
\label{colspanning-tables}%
}
@@ -1615,7 +1618,7 @@
\end{longtable*}
-\subsection{2.20~~~Rowspanning tables%
+\subsection{2.20 Rowspanning tables%
\label{rowspanning-tables}%
}
@@ -1674,7 +1677,7 @@
\end{longtable*}
-\subsection{2.21~~~Custom Roles%
+\subsection{2.21 Custom Roles%
\label{custom-roles}%
}
@@ -1718,7 +1721,7 @@
\end{itemize}
-\subsection{2.22~~~Mathematics%
+\subsection{2.22 Mathematics%
\label{mathematics}%
}
@@ -1844,7 +1847,7 @@
\end{cases}
\end{equation*}
-\section{3~~~Tests for the LaTeX writer%
+\section{3 Tests for the LaTeX writer%
\label{tests-for-the-latex-writer}%
}
@@ -1852,7 +1855,7 @@
not need to be tested with other writers (e.g. the HTML writer).
-\subsection{3.1~~~Custom Roles in LaTeX%
+\subsection{3.1 Custom Roles in LaTeX%
\label{custom-roles-in-latex}%
}
@@ -1895,7 +1898,7 @@
\end{itemize}
-\subsection{3.2~~~class handling%
+\subsection{3.2 class handling%
\label{class-handling}%
}
@@ -1997,7 +2000,7 @@
\end{DUlineblock}
-\subsection{3.3~~~More Tables%
+\subsection{3.3 More Tables%
\label{more-tables}%
}
@@ -2099,7 +2102,7 @@
% This file is used by the standalone_rst_latex test.
-\subsection{3.4~~~Option lists%
+\subsection{3.4 Option lists%
\label{id23}%
}
@@ -2129,7 +2132,7 @@
\end{description}
-\subsection{3.5~~~Monospaced non-alphanumeric characters%
+\subsection{3.5 Monospaced non-alphanumeric characters%
\label{monospaced-non-alphanumeric-characters}%
}
@@ -2145,7 +2148,7 @@
width as the third line.
-\subsection{3.6~~~Non-ASCII characters%
+\subsection{3.6 Non-ASCII characters%
\label{non-ascii-characters}%
}
@@ -2227,13 +2230,13 @@
\\
\hline
-$\spadesuit$
+♠
&
black spade suit
\\
\hline
-$\clubsuit$
+♣
&
black club suit
\\
@@ -2251,7 +2254,7 @@
\\
\hline
-$\Leftrightarrow$
+⇔
&
left-right double arrow
\\
@@ -2521,7 +2524,7 @@
\item The following line should not be wrapped, because it uses
no-break spaces (\textbackslash{}u00a0):
-X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X~X
+X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X X
\item Line wrapping with/without breakpoints marked by soft hyphens
(\textbackslash{}u00ad):
@@ -2532,7 +2535,7 @@
\end{itemize}
-\subsection{3.7~~~Encoding special chars%
+\subsection{3.7 Encoding special chars%
\label{encoding-special-chars}%
}
@@ -2595,7 +2598,7 @@
\end{quote}
-\subsection{3.8~~~Hyperlinks and -targets%
+\subsection{3.8 Hyperlinks and -targets%
\label{hyperlinks-and-targets}%
}
@@ -2641,7 +2644,7 @@
\hyperref[image-label]{image label}.
-\subsection{3.9~~~External references%
+\subsection{3.9 External references%
\label{external-references}%
}
@@ -2723,22 +2726,22 @@
\end{itemize}
-\subsection{3.10~~~Section titles with \hyperref[inline-markup]{inline markup}%
+\subsection{3.10 Section titles with \hyperref[inline-markup]{inline markup}%
\label{section-titles-with-inline-markup}%
}
-\subsubsection{3.10.1~~~\emph{emphasized}, H\textsubscript{2}O and $x^2$%
+\subsubsection{3.10.1 \emph{emphasized}, H\textsubscript{2}O and $x^2$%
\label{emphasized-h2o-and-x-2}%
}
-\subsubsection{3.10.2~~~Substitutions work%
+\subsubsection{3.10.2 Substitutions work%
\label{substitutions-fail}%
}
-\subsection{3.11~~~Deeply nested sections%
+\subsection{3.11 Deeply nested sections%
\label{deeply-nested-sections}%
}
@@ -2745,7 +2748,7 @@
In LaTeX and HTML,
-\subsubsection{3.11.1~~~Level 3%
+\subsubsection{3.11.1 Level 3%
\label{level-3}%
}
@@ -2752,7 +2755,7 @@
nested sections
-\paragraph{3.11.1.1~~~level 4%
+\paragraph{3.11.1.1 level 4%
\label{level-4}%
}
@@ -2759,7 +2762,7 @@
reach at some level
-\subparagraph{3.11.1.1.1~~~level 5%
+\subparagraph{3.11.1.1.1 level 5%
\label{level-5}%
}
@@ -2766,7 +2769,7 @@
(depending on the document class)
-\DUtitle[sectionVI]{3.11.1.1.1.1~~~level 6%
+\DUtitle[sectionVI]{3.11.1.1.1.1 level 6%
\label{level-6}%
}
@@ -2778,7 +2781,7 @@
% Preface for System Messages:
-\section{4~~~Error Handling%
+\section{4 Error Handling%
\label{error-handling}%
}
Modified: trunk/docutils/test/test_writers/test_latex2e.py
===================================================================
--- trunk/docutils/test/test_writers/test_latex2e.py 2017-04-15 19:09:59 UTC (rev 8057)
+++ trunk/docutils/test/test_writers/test_latex2e.py 2017-04-19 16:45:32 UTC (rev 8058)
@@ -309,7 +309,7 @@
\tableofcontents
-\section{1~~~first section%
+\section{1 first section%
\label{first-section}%
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-04-19 16:47:38
|
Revision: 8059
http://sourceforge.net/p/docutils/code/8059
Author: milde
Date: 2017-04-19 16:47:35 +0000 (Wed, 19 Apr 2017)
Log Message:
-----------
Documentation update.
Modified Paths:
--------------
trunk/docutils/docs/user/config.txt
trunk/docutils/test/data/dependencies.txt
trunk/docutils/test/data/raw.txt
trunk/docutils/test/test_dependencies.py
Modified: trunk/docutils/docs/user/config.txt
===================================================================
--- trunk/docutils/docs/user/config.txt 2017-04-19 16:45:32 UTC (rev 8058)
+++ trunk/docutils/docs/user/config.txt 2017-04-19 16:47:35 UTC (rev 8059)
@@ -678,7 +678,8 @@
smart_quotes
~~~~~~~~~~~~
-Change straight quotation marks to typographic form. `Quote characters`_
+Activate the SmartQuotes_ transform to
+change straight quotation marks to typographic form. `Quote characters`_
are selected according to the language of the current block element (see
language_code_). Also changes consequtive runs of hyphen-minus and full
stops (``---``, ``--``, ``...``) to em-dash, en-dash and ellipsis Unicode
@@ -696,6 +697,7 @@
New in Docutils 0.10.
+.. _SmartQuotes: smartquotes.html
.. _quote characters:
http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
@@ -786,10 +788,10 @@
(`[restructuredtext parser]`_) are set on by default.
-[python reader]
----------------
+.. [python reader]
+ ---------------
-Not implemented.
+ Not implemented.
[writers]
@@ -954,7 +956,7 @@
:HTML:
Format math in standard HTML enhanced by CSS rules.
Requires the ``math.css`` stylesheet (in the system
- `stylesheet directory <stylesheet_dirs [html4css1 writer]_>`_
+ `stylesheet directory <stylesheet_dirs [html4css1 writer]_>`_)
A `stylesheet_path <stylesheet_path [html4css1 writer]_>`_
can be appended after whitespace, the specified
@@ -975,8 +977,8 @@
__ http://docs.mathjax.org/en/latest/tex.html#supported-latex-commands
Con:
- Requires an Internet connection (or a local MathJax
- installation and configuration).
+ Requires an Internet connection or a local MathJax
+ installation and configuration.
Downloads JavaScript code from a third-party site.
Modified: trunk/docutils/test/data/dependencies.txt
===================================================================
--- trunk/docutils/test/data/dependencies.txt 2017-04-19 16:45:32 UTC (rev 8058)
+++ trunk/docutils/test/data/dependencies.txt 2017-04-19 16:47:35 UTC (rev 8059)
@@ -27,3 +27,11 @@
.. image:: ../docs/user/rst/images/biohazard.png
:scale: 50 %
+
+TODO: Paths in included files should be rewritten relative to the base
+ document.
+
+ * when loading images,
+ * when recording dependencies.
+
+.. include: subdir/dependencies-included.txt
Modified: trunk/docutils/test/data/raw.txt
===================================================================
--- trunk/docutils/test/data/raw.txt 2017-04-19 16:45:32 UTC (rev 8058)
+++ trunk/docutils/test/data/raw.txt 2017-04-19 16:47:35 UTC (rev 8059)
@@ -0,0 +1 @@
+Raw text.
Modified: trunk/docutils/test/test_dependencies.py
===================================================================
--- trunk/docutils/test/test_dependencies.py 2017-04-19 16:45:32 UTC (rev 8058)
+++ trunk/docutils/test/test_dependencies.py 2017-04-19 16:47:35 UTC (rev 8059)
@@ -74,7 +74,7 @@
self.assertEqual(record, expected)
def test_dependencies_latex(self):
- # since 0.9, the latex writer records only really accessed files, too
+ # since 0.9, the latex writer records only really accessed files, too.
# Note: currently, raw input files are read (and hence recorded) while
# parsing even if not used in the chosen output format.
# This should change (see parsers/rst/directives/misc.py).
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-04-24 09:23:10
|
Revision: 8061
http://sourceforge.net/p/docutils/code/8061
Author: milde
Date: 2017-04-24 09:23:07 +0000 (Mon, 24 Apr 2017)
Log Message:
-----------
Fix [ 317 ] Extra space inserted with French smartquotes .
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/utils/smartquotes.py
trunk/docutils/test/test_transforms/test_smartquotes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-04-19 20:00:04 UTC (rev 8060)
+++ trunk/docutils/HISTORY.txt 2017-04-24 09:23:07 UTC (rev 8061)
@@ -64,6 +64,7 @@
- New quote definitions for hr, hsb, hu, lv, sh, sl, sr.
- Fix [ 313 ] Differentiate apostrophe from closing single quote
(if possible).
+ - Fix [ 317 ] Extra space inserted with French smartquotes.
* docutils/writers/_html_base.py
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-04-19 20:00:04 UTC (rev 8060)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-04-24 09:23:07 UTC (rev 8061)
@@ -588,7 +588,10 @@
text = educateSingleBackticks(text, language)
if do_quotes:
- text = educateQuotes(prev_token_last_char+text, language)[1:]
+ # Replace plain quotes to prevent converstion to
+ # 2-character sequence in French.
+ context = prev_token_last_char.replace('"',';').replace("'",';')
+ text = educateQuotes(context+text, language)[1:]
if do_stupefy:
text = stupefyEntities(text, language)
Modified: trunk/docutils/test/test_transforms/test_smartquotes.py
===================================================================
--- trunk/docutils/test/test_transforms/test_smartquotes.py 2017-04-19 20:00:04 UTC (rev 8060)
+++ trunk/docutils/test/test_transforms/test_smartquotes.py 2017-04-24 09:23:07 UTC (rev 8061)
@@ -153,6 +153,72 @@
.\
"""],
["""\
+Do not convert context-character at inline-tag boundaries
+(in French, smart quotes expand to two characters).
+
+.. class:: language-fr-ch-x-altquot
+
+ Around "_`targets`", "*emphasized*" or "``literal``" text
+ and links to "targets_".
+
+ Inside *"emphasized"* or other `inline "roles"`:
+ (``"string"``), (``'string'``), *\\"betont\\"*, \\"*betont*".
+
+ Do not drop characters from intra-word inline markup like
+ *re*\ ``Structured``\ *Text*.
+""",
+u"""\
+<document source="test data">
+ <paragraph>
+ Do not convert context-character at inline-tag boundaries
+ (in French, smart quotes expand to two characters).
+ <paragraph classes="language-fr-ch-x-altquot">
+ Around «\u202f
+ <target ids="targets" names="targets">
+ targets
+ \u202f», «\u202f
+ <emphasis>
+ emphasized
+ \u202f» or «\u202f
+ <literal>
+ literal
+ \u202f» text
+ and links to «\u202f
+ <reference name="targets" refname="targets">
+ targets
+ \u202f».
+ <paragraph classes="language-fr-ch-x-altquot">
+ Inside \n\
+ <emphasis>
+ «\u202femphasized\u202f»
+ or other \n\
+ <title_reference>
+ inline «\u202froles\u202f»
+ :
+ (
+ <literal>
+ "string"
+ ), (
+ <literal>
+ 'string'
+ ),
+ <emphasis>
+ «\u202fbetont\u202f»
+ , «\u202f
+ <emphasis>
+ betont
+ \u202f».
+ <paragraph classes="language-fr-ch-x-altquot">
+ Do not drop characters from intra-word inline markup like
+ <emphasis>
+ re
+ <literal>
+ Structured
+ <emphasis>
+ Text
+ .
+"""],
+["""\
.. class:: language-de
German "smart quotes" and 'secondary smart quotes'.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-04-24 22:05:43
|
Revision: 8062
http://sourceforge.net/p/docutils/code/8062
Author: milde
Date: 2017-04-24 22:05:40 +0000 (Mon, 24 Apr 2017)
Log Message:
-----------
smartquotes: Add command line interface for stand-alone use (requires 2.7).
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/utils/smartquotes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2017-04-24 09:23:07 UTC (rev 8061)
+++ trunk/docutils/HISTORY.txt 2017-04-24 22:05:40 UTC (rev 8062)
@@ -65,6 +65,7 @@
- Fix [ 313 ] Differentiate apostrophe from closing single quote
(if possible).
- Fix [ 317 ] Extra space inserted with French smartquotes.
+ - Add command line interface for stand-alone use (requires 2.7).
* docutils/writers/_html_base.py
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-04-24 09:23:07 UTC (rev 8061)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-04-24 22:05:40 UTC (rev 8062)
@@ -30,9 +30,9 @@
``smartquotes.py`` is an adaption of "SmartyPants" to Docutils_.
-* Using Unicode characters instead of HTML entities for typographic quotes, it
- works for any output format that supports Unicode.
-* Support `language specific quote characters`__.
+* Using Unicode instead of HTML entities for typographic punctuation
+ characters, it works for any output format that supports Unicode.
+* Supports `language specific quote characters`__.
__ http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
@@ -179,67 +179,7 @@
This is useful, for example, when you want to use straight quotes as
foot and inch marks: 6\\'2\\" tall; a 17\\" iMac.
-Options
-=======
-Numeric values are the easiest way to configure SmartyPants' behavior:
-
-:0: Suppress all transformations. (Do nothing.)
-
-:1: Performs default SmartyPants transformations: quotes (including
- \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
- is used to signify an em-dash; there is no support for en-dashes
-
-:2: Same as smarty_pants="1", except that it uses the old-school typewriter
- shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
- (dash dash dash)
- for em-dashes.
-
-:3: Same as smarty_pants="2", but inverts the shorthand for dashes:
- "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
- en-dashes.
-
-:-1: Stupefy mode. Reverses the SmartyPants transformation process, turning
- the characters produced by SmartyPants into their ASCII equivalents.
- E.g. the LEFT DOUBLE QUOTATION MARK (“) is turned into a simple
- double-quote (\"), "—" is turned into two dashes, etc.
-
-
-The following single-character attribute values can be combined to toggle
-individual transformations from within the smarty_pants attribute. For
-example, ``py['smartypants_attributes'] = "1"`` is equivalent to
-``py['smartypants_attributes'] = "qBde"``.
-
-:q: Educates normal quote characters: (") and (').
-
-:b: Educates \`\`backticks'' -style double quotes.
-
-:B: Educates \`\`backticks'' -style double quotes and \`single' quotes.
-
-:d: Educates em-dashes.
-
-:D: Educates em-dashes and en-dashes, using old-school typewriter shorthand:
- (dash dash) for en-dashes, (dash dash dash) for em-dashes.
-
-:i: Educates em-dashes and en-dashes, using inverted old-school typewriter
- shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
-
-:e: Educates ellipses.
-
-:w: Translates any instance of ``"`` into a normal double-quote character.
- This should be of no interest to most people, but of particular interest
- to anyone who writes their posts using Dreamweaver, as Dreamweaver
- inexplicably uses this entity to represent a literal double-quote
- character. SmartyPants only educates normal quotes, not entities (because
- ordinarily, entities are used for the explicit purpose of representing the
- specific character they represent). The "w" option must be used in
- conjunction with one (or both) of the other quote options ("q" or "b").
- Thus, if you wish to apply all SmartyPants transformations (quotes, en-
- and em-dashes, and ellipses) and also translate ``"`` entities into
- regular quotes so SmartyPants can educate them, you should pass the
- following to the smarty_pants attribute:
-
-
Caveats
=======
@@ -321,6 +261,9 @@
Version History
===============
+1.8: 2017-04-24
+ - Command line front-end.
+
1.7.1: 2017-03-19
- Update and extend language-dependent quotes.
- Differentiate apostrophe from single quote.
@@ -369,10 +312,72 @@
- Initial release
"""
+options = r"""
+Options
+=======
+
+Numeric values are the easiest way to configure SmartyPants' behavior:
+
+:0: Suppress all transformations. (Do nothing.)
+
+:1: Performs default SmartyPants transformations: quotes (including
+ \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
+ is used to signify an em-dash; there is no support for en-dashes
+
+:2: Same as smarty_pants="1", except that it uses the old-school typewriter
+ shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
+ (dash dash dash)
+ for em-dashes.
+
+:3: Same as smarty_pants="2", but inverts the shorthand for dashes:
+ "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
+ en-dashes.
+
+:-1: Stupefy mode. Reverses the SmartyPants transformation process, turning
+ the characters produced by SmartyPants into their ASCII equivalents.
+ E.g. the LEFT DOUBLE QUOTATION MARK (“) is turned into a simple
+ double-quote (\"), "—" is turned into two dashes, etc.
+
+
+The following single-character attribute values can be combined to toggle
+individual transformations from within the smarty_pants attribute. For
+example, ``"1"`` is equivalent to ``"qBde"``.
+
+:q: Educates normal quote characters: (") and (').
+
+:b: Educates \`\`backticks'' -style double quotes.
+
+:B: Educates \`\`backticks'' -style double quotes and \`single' quotes.
+
+:d: Educates em-dashes.
+
+:D: Educates em-dashes and en-dashes, using old-school typewriter shorthand:
+ (dash dash) for en-dashes, (dash dash dash) for em-dashes.
+
+:i: Educates em-dashes and en-dashes, using inverted old-school typewriter
+ shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
+
+:e: Educates ellipses.
+
+:w: Translates any instance of ``"`` into a normal double-quote character.
+ This should be of no interest to most people, but of particular interest
+ to anyone who writes their posts using Dreamweaver, as Dreamweaver
+ inexplicably uses this entity to represent a literal double-quote
+ character. SmartyPants only educates normal quotes, not entities (because
+ ordinarily, entities are used for the explicit purpose of representing the
+ specific character they represent). The "w" option must be used in
+ conjunction with one (or both) of the other quote options ("q" or "b").
+ Thus, if you wish to apply all SmartyPants transformations (quotes, en-
+ and em-dashes, and ellipses) and also translate ``"`` entities into
+ regular quotes so SmartyPants can educate them, you should pass the
+ following to the smarty_pants attribute:
+"""
+
+
default_smartypants_attr = "1"
-import re
+import re, sys
class smartchars(object):
"""Smart quotes and dashes
@@ -514,9 +519,8 @@
do_ellipses = False
do_stupefy = False
- if attr == "0": # Do nothing.
- yield text
- elif attr == "1": # Do everything, turn all options on.
+ # if attr == "0": # pass tokens unchanged (see below).
+ if attr == "1": # Do everything, turn all options on.
do_quotes = True
do_backticks = True
do_dashes = 1
@@ -559,7 +563,7 @@
# skip literal text (math, literal, raw, ...)
if ttype == 'literal':
- prev_token_last_char = text[-1:]
+ prev_token_last_char = text[-1:].replace('"',';').replace("'",';')
yield text
continue
@@ -588,10 +592,7 @@
text = educateSingleBackticks(text, language)
if do_quotes:
- # Replace plain quotes to prevent converstion to
- # 2-character sequence in French.
- context = prev_token_last_char.replace('"',';').replace("'",';')
- text = educateQuotes(context+text, language)[1:]
+ text = educateQuotes(prev_token_last_char+text, language)[1:]
if do_stupefy:
text = stupefyEntities(text, language)
@@ -908,37 +909,80 @@
if __name__ == "__main__":
import locale
-
try:
locale.setlocale(locale.LC_ALL, '')
except:
pass
- from docutils.core import publish_string
- docstring_html = publish_string(__doc__, writer_name='html5')
+ # from docutils.core import publish_string
+ # docstring_html = publish_string(__doc__, writer_name='html5')
+ #
+ # print docstring_html
- print docstring_html
+ import argparse
+ parser = argparse.ArgumentParser(
+ description='Filter stdin making ASCII punctuation "smart".')
+ # parser.add_argument("text", help="text to be acted on")
+ parser.add_argument("-a", "--action", default="1",
+ help="what to do with the input (see --actionhelp)")
+ parser.add_argument("-e", "--encoding", default="utf8",
+ help="text encoding")
+ parser.add_argument("-l", "--language", default="en",
+ help="text language (BCP47 tag)")
+ parser.add_argument("-q", "--alternative-quotes", action="store_true",
+ help="use alternative quote style")
+ parser.add_argument("--doc", action="store_true",
+ help="print documentation")
+ parser.add_argument("--actionhelp", action="store_true",
+ help="list available actions")
+ parser.add_argument("--stylehelp", action="store_true",
+ help="list available quote styles")
+ parser.add_argument("--test", action="store_true",
+ help="perform short self-test")
+ args = parser.parse_args()
- # Unit test output goes to stderr.
- import unittest
- sp = smartyPants
+ if args.doc:
+ print (__doc__)
+ elif args.actionhelp:
+ print options
+ elif args.stylehelp:
+ print
+ print "Available styles (primary open/close, secondary open/close)"
+ print "language tag quotes"
+ print "============ ======"
+ for (key, value) in smartchars.quotes.items():
+ print "%-14s %s" % (key, value)
+ elif args.test:
+ # Unit test output goes to stderr.
+ import unittest
- class TestSmartypantsAllAttributes(unittest.TestCase):
- # the default attribute is "1", which means "all".
+ class TestSmartypantsAllAttributes(unittest.TestCase):
+ # the default attribute is "1", which means "all".
+ def test_dates(self):
+ self.assertEqual(smartyPants("1440-80's"), u"1440-80’s")
+ self.assertEqual(smartyPants("1440-'80s"), u"1440-’80s")
+ self.assertEqual(smartyPants("1440---'80s"), u"1440–’80s")
+ self.assertEqual(smartyPants("1960's"), u"1960’s")
+ self.assertEqual(smartyPants("one two '60s"), u"one two ’60s")
+ self.assertEqual(smartyPants("'60s"), u"’60s")
- def test_dates(self):
- self.assertEqual(sp("1440-80's"), u"1440-80’s")
- self.assertEqual(sp("1440-'80s"), u"1440-’80s")
- self.assertEqual(sp("1440---'80s"), u"1440–’80s")
- self.assertEqual(sp("1960's"), u"1960’s")
- self.assertEqual(sp("one two '60s"), u"one two ’60s")
- self.assertEqual(sp("'60s"), u"’60s")
+ def test_educated_quotes(self):
+ self.assertEqual(smartyPants('"Isn\'t this fun?"'), u'“Isn’t this fun?”')
- def test_educated_quotes(self):
- self.assertEqual(sp('''"Isn't this fun?"'''), u'“Isn’t this fun?”')
+ def test_html_tags(self):
+ text = '<a src="foo">more</a>'
+ self.assertEqual(smartyPants(text), text)
- def test_html_tags(self):
- text = '<a src="foo">more</a>'
- self.assertEqual(sp(text), text)
+ suite = unittest.TestLoader().loadTestsFromTestCase(
+ TestSmartypantsAllAttributes)
+ unittest.TextTestRunner().run(suite)
- unittest.main()
+ else:
+ if args.alternative_quotes:
+ if '-x-altquot' in args.language:
+ args.language = args.language.replace('-x-altquot', '')
+ else:
+ args.language += '-x-altquot'
+ text = sys.stdin.read().decode(args.encoding)
+ print smartyPants(text, attr=args.action,
+ language=args.language).encode(args.encoding)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-05-04 19:19:16
|
Revision: 8066
http://sourceforge.net/p/docutils/code/8066
Author: milde
Date: 2017-05-04 19:19:13 +0000 (Thu, 04 May 2017)
Log Message:
-----------
Minor documentation fixes.
Modified Paths:
--------------
trunk/docutils/RELEASE-NOTES.txt
trunk/docutils/docutils/transforms/universal.py
Modified: trunk/docutils/RELEASE-NOTES.txt
===================================================================
--- trunk/docutils/RELEASE-NOTES.txt 2017-05-04 13:06:23 UTC (rev 8065)
+++ trunk/docutils/RELEASE-NOTES.txt 2017-05-04 19:19:13 UTC (rev 8066)
@@ -22,10 +22,10 @@
Future changes
==============
-* remove the `handle_io_errors` option from io.FileInput/Output.
+* Remove the `handle_io_errors` option from io.FileInput/Output.
Used by Sphinx up to version 1.3.1, fixed in 1.3.2 (Nov 29, 2015).
-* Stepwise drop support for Python < 2.7.
+* Drop support for Python 2.4 and 2.5.
* »Prune« the doctree (no change to the reST input syntax):
Modified: trunk/docutils/docutils/transforms/universal.py
===================================================================
--- trunk/docutils/docutils/transforms/universal.py 2017-05-04 13:06:23 UTC (rev 8065)
+++ trunk/docutils/docutils/transforms/universal.py 2017-05-04 19:19:13 UTC (rev 8066)
@@ -273,7 +273,7 @@
lang = lang.replace('-x-altquot', '')
else:
lang += '-x-altquot'
- # drop subtags missing in quotes:
+ # drop unsupported subtags:
for tag in utils.normalize_language_tag(lang):
if tag in smartquotes.smartchars.quotes:
lang = tag
@@ -286,7 +286,7 @@
lang = ''
# Iterator educating quotes in plain text:
- # '2': set all, using old school en- and em- dash shortcuts
+ # (see "utils/smartquotes.py" for the attribute setting)
teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
attr='qDe', language=lang)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-05-04 20:10:06
|
Revision: 8067
http://sourceforge.net/p/docutils/code/8067
Author: milde
Date: 2017-05-04 20:10:03 +0000 (Thu, 04 May 2017)
Log Message:
-----------
unroll problematic refs to their origin in document when reporting errors
Patch by Kirill Smelkov. This is part of the (partially outdated) suite of
patches on https://sourceforge.net/p/docutils/patches/67/.
Modified Paths:
--------------
trunk/docutils/docutils/transforms/references.py
trunk/docutils/test/test_transforms/test_substitutions.py
Modified: trunk/docutils/docutils/transforms/references.py
===================================================================
--- trunk/docutils/docutils/transforms/references.py 2017-05-04 19:19:13 UTC (rev 8066)
+++ trunk/docutils/docutils/transforms/references.py 2017-05-04 20:10:03 UTC (rev 8067)
@@ -710,6 +710,7 @@
raise CircularSubstitutionDefinitionError
else:
nested[nested_name].append(key)
+ nested_ref['ref-origin'] = ref
subreflist.append(nested_ref)
except CircularSubstitutionDefinitionError:
parent = ref.parent
@@ -721,9 +722,13 @@
line=parent.line, base_node=parent)
parent.replace_self(msg)
else:
+ # find original ref substitution which cased this error
+ ref_origin = ref
+ while ref_origin.hasattr('ref-origin'):
+ ref_origin = ref_origin['ref-origin']
msg = self.document.reporter.error(
- 'Circular substitution definition referenced: "%s".'
- % refname, base_node=ref)
+ 'Circular substitution definition referenced: '
+ '"%s".' % refname, base_node=ref_origin)
msgid = self.document.set_id(msg)
prb = nodes.problematic(
ref.rawsource, ref.rawsource, refid=msgid)
Modified: trunk/docutils/test/test_transforms/test_substitutions.py
===================================================================
--- trunk/docutils/test/test_transforms/test_substitutions.py 2017-05-04 19:19:13 UTC (rev 8066)
+++ trunk/docutils/test/test_transforms/test_substitutions.py 2017-05-04 20:10:03 UTC (rev 8067)
@@ -223,7 +223,7 @@
<system_message backrefs="id6" ids="id5" level="3" line="5" source="test data" type="ERROR">
<paragraph>
Circular substitution definition referenced: "sub".
- <system_message backrefs="id8" ids="id7" level="3" source="test data" type="ERROR">
+ <system_message backrefs="id8" ids="id7" level="3" line="5" source="test data" type="ERROR">
<paragraph>
Circular substitution definition referenced: "Sub".
"""],
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2017-05-08 22:10:41
|
Revision: 8068
http://sourceforge.net/p/docutils/code/8068
Author: milde
Date: 2017-05-08 22:10:39 +0000 (Mon, 08 May 2017)
Log Message:
-----------
Add "smartquotes-locales" setting.
Modified Paths:
--------------
trunk/docutils/docs/user/config.txt
trunk/docutils/docs/user/smartquotes.txt
trunk/docutils/docutils/frontend.py
trunk/docutils/docutils/parsers/rst/__init__.py
trunk/docutils/docutils/transforms/universal.py
trunk/docutils/docutils/utils/smartquotes.py
trunk/docutils/test/test_settings.py
trunk/docutils/test/test_transforms/test_smartquotes.py
Modified: trunk/docutils/docs/user/config.txt
===================================================================
--- trunk/docutils/docs/user/config.txt 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/docs/user/config.txt 2017-05-08 22:10:39 UTC (rev 8068)
@@ -681,10 +681,12 @@
Activate the SmartQuotes_ transform to
change straight quotation marks to typographic form. `Quote characters`_
are selected according to the language of the current block element (see
-language_code_). Also changes consequtive runs of hyphen-minus and full
-stops (``---``, ``--``, ``...``) to em-dash, en-dash and ellipsis Unicode
-characters respectively.
+language_code_, smartquotes_locales_, and the `pre-defined quote sets`__).
+Also changes consecutive runs of hyphen-minus and full stops (``---``,
+``--``, ``...``) to em-dash, en-dash and ellipsis Unicode characters
+respectively.
+
Supported values:
booleans_ (yes/no)
@@ -698,9 +700,33 @@
New in Docutils 0.10.
.. _SmartQuotes: smartquotes.html
+__ smartquotes.html#localisation
.. _quote characters:
http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks
+
+smartquotes_locales
+~~~~~~~~~~~~~~~~~~~
+
+Typographical quotes used by the SmartQuotes_ transform.
+
+A comma-separated list_ with language tag and a set of four quotes (primary
+open/close, secondary open/close)smartquotes_locales. (If more than one
+character shall be used for a quote (e.g. padding in French quotes), a
+colon-separated list may be used.)
+
+Example:
+ Ensure a correct leading apostrophe in ``'s Gravenhage`` in Dutch (at the
+ cost of incorrect opening single quotes) and set French quotes to double
+ and single guillemets with inner padding::
+
+ smartquote-locales: nl: „”’’,
+ fr: « : »:‹ : ›
+
+Default: None. Option: ``--smartquotes-locales``.
+
+New in Docutils 0.14.
+
syntax_highlight
~~~~~~~~~~~~~~~~
Modified: trunk/docutils/docs/user/smartquotes.txt
===================================================================
--- trunk/docutils/docs/user/smartquotes.txt 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/docs/user/smartquotes.txt 2017-05-08 22:10:39 UTC (rev 8068)
@@ -17,7 +17,7 @@
Description
===========
-The `smart_quotes configuration setting`_ triggers the SmartQuotes
+The `"smart_quotes" configuration setting`_ triggers the SmartQuotes
transformation on Text nodes that includes the following steps:
- Straight quotes (``"`` and ``'``) into "curly" quote characters
@@ -30,7 +30,7 @@
Advantages:
-* typing speed (especiall when blind-typing),
+* typing speed (especially when blind-typing),
* the possibility to change the quoting style of the
complete document with just one configuration option, and
* restriction to 7-bit characters in the source.
@@ -43,7 +43,8 @@
So, please consider also
`Why You Might Not Want to Use "Smart" Quotes in Your Documents`_.
-.. _smart_quotes configuration setting: config.html#smart-quotes
+.. _"smart_quotes" configuration setting:
+.. _"smart_quotes" setting: config.html#smart-quotes
Escaping
@@ -77,288 +78,303 @@
Quotation marks have a `variety of forms`__ in different languages and
media.
-`Smartquotes` selects quotation marks depending on the language of the current
-block element and the value of the `smart_quotes configuration setting`_.
-
__ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table
-Docutils' `smartquotes` support the following languages:
+`SmartQuotes` inserts quotation marks depending on the language of the
+current block element and the value of the `"smart_quotes" setting`_.\
+[#x-altquot]_
+There is built-in support for the following languages:\ [#smartquotes-locales]_
+:af: .. class:: language-af
-.. class:: language-af
+ "'Afrikaans' quotes"
-"'Afrikaans' quotes"
+:af-x-altquot: .. class:: language-af-x-altquot
-.. class:: language-af-x-altquot
+ "'Afrikaans' alternative quotes"
-"'Afrikaans' alternative quotes"
+:ca: .. class:: language-ca
-.. class:: language-ca
+ "'Catalan' quotes"
-"'Catalan' quotes"
+:ca-x-altquot: .. class:: language-ca-x-altquot
-.. class:: language-ca-x-altquot
+ "'Catalan' alternative quotes"
-"'Catalan' alternative quotes"
+:cs: .. class:: language-cs
-.. class:: language-cs
+ "'Czech' quotes"
-"'Czech' quotes"
+:cs-x-altquot: .. class:: language-cs-x-altquot
-.. class:: language-cs-x-altquot
+ "'Czech' alternative quotes"
-"'Czech' alternative quotes"
+:da: .. class:: language-da
-.. class:: language-da
+ "'Danish' quotes"
-"'Danish' quotes"
+:da-x-altquot: .. class:: language-da-x-altquot
-.. class:: language-da-x-altquot
+ "'Danish' alternative quotes"
-"'Danish' alternative quotes"
+:de: .. class:: language-de
-.. class:: language-de
+ "'German' quotes"
-"'German' quotes"
+:de-x-altquot: .. class:: language-de-x-altquot
-.. class:: language-de-x-altquot
+ "'German' alternative quotes"
-"'German' alternative quotes"
+:de-ch: .. class:: language-de-ch
-.. class:: language-de-ch
+ "'Swiss-German' quotes"
-"'Swiss-German' quotes"
+:el: .. class:: language-el
-.. class:: language-el
+ "'Greek' quotes"
-"'Greek' quotes"
+:en: .. class:: language-en
-.. class:: language-en
+ "'English' quotes"
-"'English' quotes"
+:en-uk-x-altquot: .. class:: language-en-uk-x-altquot
-.. class:: language-en-uk-x-altquot
+ "'British' alternative quotes"
+ (swaps single and double quotes: ``"`` → ‘ and ``'`` → “)
-"'British' alternative quotes"
-(swaps single and double quotes: ``"`` → ‘ and ``'`` → “)
+:eo: .. class:: language-eo
-.. class:: language-eo
+ "'Esperanto' quotes"
-"'Esperanto' quotes"
+:es: .. class:: language-es
-.. class:: language-es
+ "'Spanish' quotes"
-"'Spanish' quotes"
+:es-x-altquot: .. class:: language-es-x-altquot
-.. class:: language-es-x-altquot
+ "'Spanish' alternative quotes"
-"'Spanish' alternative quotes"
+:et: .. class:: language-et
-.. class:: language-et
+ "'Estonian' quotes" (no secondary quote listed in Wikipedia)
-"'Estonian' quotes" (no secondary quote listed in Wikipedia)
+:et-x-altquot: .. class:: language-et-x-altquot
-.. class:: language-et-x-altquot
+ "'Estonian' alternative quotes"
-"'Estonian' alternative quotes"
+:eu: .. class:: language-eu
-.. class:: language-eu
+ "'Basque' quotes"
-"'Basque' quotes"
+:fi: .. class:: language-fi
-.. class:: language-fi
+ "'Finnish' quotes"
-"'Finnish' quotes"
+:fi-x-altquot: .. class:: language-fi-x-altquot
-.. class:: language-fi-x-altquot
+ "'Finnish' alternative quotes"
-"'Finnish' alternative quotes"
+:fr: .. class:: language-fr
-.. class:: language-fr
+ "'French' quotes"
-"'French' quotes"
+:fr-x-altquot: .. class:: language-fr-x-altquot
-.. class:: language-fr-x-altquot
+ "'French' alternative quotes"
-"'French' alternative quotes"
+:fr-ch: .. class:: language-fr-ch
-.. class:: language-fr-ch
+ "'Swiss-French' quotes"
-"'Swiss-French' quotes"
+:fr-ch-x-altquot: .. class:: language-fr-ch-x-altquot
-.. class:: language-fr-ch-x-altquot
+ "'Swiss-French' alternative quotes" (narrow no-break space, see
+ http://typoguide.ch/)
-"'Swiss-French' alternative quotes" (narrow no-break space, see
-http://typoguide.ch/)
+:gl: .. class:: language-gl
-.. class:: language-gl
+ "'Galician' quotes"
-"'Galician' quotes"
+:he: .. class:: language-he
-.. class:: language-he
+ "'Hebrew' quotes"
-"'Hebrew' quotes"
+:he-x-altquot: .. class:: language-he-x-altquot
-.. class:: language-he-x-altquot
+ "'Hebrew' alternative quotes"
-"'Hebrew' alternative quotes"
+:hr: .. class:: language-hr
-.. class:: language-hr
+ "'Croatian' quotes"
-"'Croatian' quotes"
+:hr-x-altquot: .. class:: language-hr-x-altquot
-.. class:: language-hr-x-altquot
+ "'Croatian' alternative quotes"
-"'Croatian' alternative quotes"
+:hsb: .. class:: language-hsb
-.. class:: language-hsb
+ "'Upper Sorbian' quotes"
-"'Upper Sorbian' quotes"
+:hsb-x-altquot: .. class:: language-hsb-x-altquot
-.. class:: language-hsb-x-altquot
+ "'Upper Sorbian' alternative quotes"
-"'Upper Sorbian' alternative quotes"
+:hu: .. class:: language-hu
-.. class:: language-hu
+ "'Hungarian' quotes"
-"'Hungarian' quotes"
+:is: .. class:: language-is
-.. class:: language-is
+ "'Icelandic' quotes"
-"'Icelandic' quotes"
+:it: .. class:: language-it
-.. class:: language-it
+ "'Italian' quotes"
-"'Italian' quotes"
+:it-ch: .. class:: language-it-ch
-.. class:: language-it-ch
+ "'Swiss-Italian' quotes"
-"'Swiss-Italian' quotes"
+:it-x-altquot: .. class:: language-it-x-altquot
-.. class:: language-it-x-altquot
+ "'Italian' alternative quotes"
-"'Italian' alternative quotes"
+:ja: .. class:: language-ja
-.. class:: language-ja
+ "'Japanese' quotes"
-"'Japanese' quotes"
+:lt: .. class:: language-lt
-.. class:: language-lt
+ "'Lithuanian' quotes"
-"'Lithuanian' quotes"
+:lv: .. class:: language-lv
-.. class:: language-lv
+ "'Latvian' quotes"
-"'Latvian' quotes"
+:nl: .. class:: language-nl
-.. class:: language-nl
+ "'Dutch' quotes"
-"'Dutch' quotes"
+:nl-x-altquot: .. class:: language-nl-x-altquot
-.. class:: language-nl-x-altquot
+ "'Dutch' alternative quotes"
-"'Dutch' alternative quotes"
-
.. # 'nl-x-altquot2': u'””’’',
-.. class:: language-pl
+:pl: .. class:: language-pl
-"'Polish' quotes"
+ "'Polish' quotes"
-.. class:: language-pl-x-altquot
+:pl-x-altquot: .. class:: language-pl-x-altquot
-"'Polish' alternative quotes"
+ "'Polish' alternative quotes"
-.. class:: language-pt
+:pt: .. class:: language-pt
-"'Portuguese' quotes"
+ "'Portuguese' quotes"
-.. class:: language-pt-br
+:pt-br: .. class:: language-pt-br
-"'Portuguese (Brazil)' quotes"
+ "'Portuguese (Brazil)' quotes"
-.. class:: language-ro
+:ro: .. class:: language-ro
-"'Romanian' quotes"
+ "'Romanian' quotes"
-.. class:: language-ru
+:ru: .. class:: language-ru
-"'Russian' quotes"
+ "'Russian' quotes"
-.. class:: language-sh
+:sh: .. class:: language-sh
-"'Serbo-Croatian' quotes"
+ "'Serbo-Croatian' quotes"
-.. class:: language-sh-x-altquot
+:sh-x-altquot: .. class:: language-sh-x-altquot
-"'Serbo-Croatian' alternative quotes"
+ "'Serbo-Croatian' alternative quotes"
-.. class:: language-sk
+:sk: .. class:: language-sk
-"'Slovak' quotes"
+ "'Slovak' quotes"
-.. class:: language-sk-x-altquot
+:sk-x-altquot: .. class:: language-sk-x-altquot
-"'Slovak' alternative quotes"
+ "'Slovak' alternative quotes"
-.. class:: language-sl
+:sl: .. class:: language-sl
-"'Slovenian' quotes"
+ "'Slovenian' quotes"
-.. class:: language-sl-x-altquot
+:sl-x-altquot: .. class:: language-sl-x-altquot
-"'Slovenian' alternative quotes"
+ "'Slovenian' alternative quotes"
-.. class:: language-sr
+:sr: .. class:: language-sr
-"'Serbian' quotes"
+ "'Serbian' quotes"
-.. class:: language-sr-x-altquot
+:sr-x-altquot: .. class:: language-sr-x-altquot
-"'Serbian' alternative quotes"
+ "'Serbian' alternative quotes"
-.. class:: language-sv
+:sv: .. class:: language-sv
-"'Swedish' quotes"
+ "'Swedish' quotes"
-.. class:: language-sv-x-altquot
+:sv-x-altquot: .. class:: language-sv-x-altquot
-"'Swedish' alternative quotes"
+ "'Swedish' alternative quotes"
-.. class:: language-tr
+:tr: .. class:: language-tr
-"'Turkish' quotes"
+ "'Turkish' quotes"
-.. class:: language-tr-x-altquot
+:tr-x-altquot: .. class:: language-tr-x-altquot
-"'Turkish' alternative quotes"
+ "'Turkish' alternative quotes"
.. 'tr-x-altquot2': u'“„‘‚', # antiquated?
-.. class:: language-uk
+:uk: .. class:: language-uk
-"'Ukrainian' quotes"
+ "'Ukrainian' quotes"
-.. class:: language-uk-x-altquot
+:uk-x-altquot: .. class:: language-uk-x-altquot
-"'Ukrainian' alternative quotes"
+ "'Ukrainian' alternative quotes"
-.. class:: language-zh-cn
+:zh-cn: .. class:: language-zh-cn
-"'Chinese (China)' quotes"
+ "'Chinese (China)' quotes"
-.. class:: language-zh-tw
+:zh-tw: .. class:: language-zh-tw
-"'Chinese (Taiwan)' quotes"
+ "'Chinese (Taiwan)' quotes"
-Quotes in text blocks in a non-supported language use the document
-language:
+Quotes in text blocks in a non-configured language are kept as plain quotes:
-.. class:: langugage-undefined-example
+:undefined: .. class:: language-undefined-example
-"'Undefined' quotes"
+ "'Undefined' quotes"
+.. [#x-altquot] Tags with the non-standard extension ``-x-altquot`` define
+ the quote set used with the `"smart_quotes" setting`_ value ``"alt"``.
+.. [#smartquotes-locales] The definitions for language-dependend
+ typographical quotes can be extended or overwritten using the
+ `"smartquotes_locales" setting`_.
+
+ The following example ensures a correct leading apostrophe in ``'s
+ Gravenhage`` (at the cost of incorrect leading single quotes) in Dutch
+ and sets French quotes to double and single guillemets with inner
+ spacing::
+
+ smartquote-locales: nl: „”’’
+ fr: « : »:‹ : ›
+
+.. _"smartquotes_locales" setting: config.html#smartquotes-locales
+
+
Caveats
=======
@@ -452,7 +468,7 @@
`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
testing of the original SmartyPants.
-Internationalization and adaption to Docutils by Günter Milde.
+Internationalisation and adaption to Docutils by Günter Milde.
.. _SmartyPants: http://daringfireball.net/projects/smartypants/
.. _Pyblosxom: http://pyblosxom.bluesock.org/
Modified: trunk/docutils/docutils/frontend.py
===================================================================
--- trunk/docutils/docutils/frontend.py 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/docutils/frontend.py 2017-05-08 22:10:39 UTC (rev 8068)
@@ -40,7 +40,8 @@
import docutils
import docutils.utils
import docutils.nodes
-from docutils.utils.error_reporting import locale_encoding, ErrorOutput, ErrorString
+from docutils.utils.error_reporting import (locale_encoding, SafeString,
+ ErrorOutput, ErrorString)
def store_multiple(option, opt, value, parser, *args, **kwargs):
@@ -205,10 +206,45 @@
for cls in value:
normalized = docutils.nodes.make_id(cls)
if cls != normalized:
- raise ValueError('invalid class value %r (perhaps %r?)'
+ raise ValueError('Invalid class value %r (perhaps %r?)'
% (cls, normalized))
return value
+def validate_smartquotes_locales(setting, value, option_parser,
+ config_parser=None, config_section=None):
+ """Check/normalize a comma separated list of smart quote definitions.
+
+ Return a list of (language-tag, quotes) string tuples."""
+
+ # value is a comma separated string list:
+ value = validate_comma_separated_list(setting, value, option_parser,
+ config_parser, config_section)
+ # validate list elements
+ lc_quotes = []
+ for item in value:
+ try:
+ lang, quotes = item.split(':', 1)
+ except AttributeError:
+ # this function is called for every option added to `value`
+ # -> ignore if already a tuple:
+ lc_quotes.append(item)
+ continue
+ except ValueError:
+ raise ValueError(u'Invalid value "%s".'
+ ' Format is "<language>:<quotes>".'
+ % item.encode('ascii', 'backslashreplace'))
+ # parse colon separated string list:
+ quotes = quotes.strip()
+ multichar_quotes = quotes.split(':')
+ if len(multichar_quotes) == 4:
+ quotes = multichar_quotes
+ elif len(quotes) != 4:
+ raise ValueError('Invalid value "%s". Please specify 4 quotes\n'
+ ' (primary open/close; secondary open/close).'
+ % item.encode('ascii', 'backslashreplace'))
+ lc_quotes.append((lang,quotes))
+ return lc_quotes
+
def make_paths_absolute(pathdict, keys, base_path=None):
"""
Interpret filesystem path settings relative to the `base_path` given.
@@ -568,7 +604,7 @@
try:
config_settings = self.get_standard_config_settings()
except ValueError, error:
- self.error(error)
+ self.error(SafeString(error))
self.set_defaults_from_dict(config_settings.__dict__)
def populate_from_components(self, components):
Modified: trunk/docutils/docutils/parsers/rst/__init__.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/__init__.py 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/docutils/parsers/rst/__init__.py 2017-05-08 22:10:39 UTC (rev 8068)
@@ -141,7 +141,13 @@
('Change straight quotation marks to typographic form: '
'one of "yes", "no", "alt[ernative]" (default "no").',
['--smart-quotes'],
- {'default': False, 'validator': frontend.validate_ternary}),
+ {'default': False, 'metavar': '<yes/no/alt>',
+ 'validator': frontend.validate_ternary}),
+ ('Characters to use as "smart quotes" for <language>. ',
+ ['--smartquotes-locales'],
+ {'metavar': '<language:quotes[,language:quotes,...]>',
+ 'action': 'append',
+ 'validator': frontend.validate_smartquotes_locales}),
('Inline markup recognized at word boundaries only '
'(adjacent to punctuation or whitespace). '
'Force character-level inline markup recognition with '
Modified: trunk/docutils/docutils/transforms/universal.py
===================================================================
--- trunk/docutils/docutils/transforms/universal.py 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/docutils/transforms/universal.py 2017-05-08 22:10:39 UTC (rev 8068)
@@ -249,6 +249,9 @@
# print repr(alternative)
document_language = self.document.settings.language_code
+ lc_smartquotes = self.document.settings.smartquotes_locales
+ if lc_smartquotes:
+ smartquotes.smartchars.quotes.update(dict(lc_smartquotes))
# "Educate" quotes in normal text. Handle each block of text
# (TextElement node) as a unit to keep context around inline nodes:
Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/docutils/utils/smartquotes.py 2017-05-08 22:10:39 UTC (rev 8068)
@@ -592,7 +592,7 @@
text = educateSingleBackticks(text, language)
if do_quotes:
- # Replace plain quotes to prevent converstion to
+ # Replace plain quotes in context to prevent converstion to
# 2-character sequence in French.
context = prev_token_last_char.replace('"',';').replace("'",';')
text = educateQuotes(context+text, language)[1:]
Modified: trunk/docutils/test/test_settings.py
===================================================================
--- trunk/docutils/test/test_settings.py 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/test/test_settings.py 2017-05-08 22:10:39 UTC (rev 8068)
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
# $Id$
# Author: David Goodger <go...@py...>
@@ -176,6 +177,10 @@
pathdict = {'foo': 'hallo', 'ham': u'h\xE4m', 'spam': u'spam'}
keys = ['foo', 'ham']
+ def setUp(self):
+ self.option_parser = frontend.OptionParser(
+ components=(rst.Parser,), read_config_files=None)
+
def test_make_paths_absolute(self):
pathdict = self.pathdict.copy()
frontend.make_paths_absolute(pathdict, self.keys, base_path='base')
@@ -195,6 +200,33 @@
# not touched, because key not in keys:
self.assertEqual(pathdict['spam'], u'spam')
+ boolean_settings = (
+ (True, True ),
+ ('1', True ),
+ (u'on', True ),
+ ('yes', True ),
+ (u'true', True ),
+ (u'0', False ),
+ ('off', False ),
+ (u'no', False ),
+ ('false', False ),
+ )
+ def test_validate_boolean(self):
+ for t in self.boolean_settings:
+ self.assertEqual(
+ frontend.validate_boolean(None, t[0], self.option_parser),
+ t[1])
+
+ def test_validate_ternary(self):
+ tests = (
+ ('500V', '500V'),
+ (u'parrot', u'parrot'),
+ )
+ for t in self.boolean_settings + tests:
+ self.assertEqual(
+ frontend.validate_ternary(None, t[0], self.option_parser),
+ t[1])
+
def test_validate_colon_separated_string_list(self):
tests = (
(u'a', ['a',] ),
@@ -209,7 +241,6 @@
frontend.validate_colon_separated_string_list(None, t[0], None),
t[1])
-
def test_validate_comma_separated_list(self):
tests = (
(u'a', ['a',] ),
@@ -225,5 +256,33 @@
frontend.validate_comma_separated_list(None, t[0], None),
t[1])
+ def test_validate_url_trailing_slash(self):
+ tests = (
+ ('', './' ),
+ (None, './' ),
+ (u'http://example.org', u'http://example.org/' ),
+ ('http://example.org/', 'http://example.org/' ),
+ )
+ for t in tests:
+ self.assertEqual(
+ frontend.validate_url_trailing_slash(None, t[0], None),
+ t[1])
+
+ def test_validate_smartquotes_locales(self):
+ tests = (
+ ('en:ssvv', [('en', 'ssvv')]),
+ (u'sd:«»°°', [(u'sd', u'«»°°')]),
+ ([('sd', u'«»°°'), u'ds:°°«»'], [('sd', u'«»°°'),
+ ('ds', u'°°«»')]),
+ (u'frs:« : »:((:))', [(u'frs', [u'« ', u' »',
+ u'((', u'))'])]),
+ )
+ for t in tests:
+ self.assertEqual(
+ frontend.validate_smartquotes_locales(None, t[0], None),
+ t[1])
+
+
+
if __name__ == '__main__':
unittest.main()
Modified: trunk/docutils/test/test_transforms/test_smartquotes.py
===================================================================
--- trunk/docutils/test/test_transforms/test_smartquotes.py 2017-05-04 20:10:03 UTC (rev 8067)
+++ trunk/docutils/test/test_transforms/test_smartquotes.py 2017-05-08 22:10:39 UTC (rev 8068)
@@ -32,6 +32,9 @@
s.generateTests(totest_de)
settings['smart_quotes'] = 'alternative'
s.generateTests(totest_de_alt)
+ settings['smart_quotes'] = True
+ settings['smartquotes_locales'] = [('de', u'«»()'), ('nl', u'„”’’')]
+ s.generateTests(totest_locales)
return s
@@ -38,6 +41,7 @@
totest = {}
totest_de = {}
totest_de_alt = {}
+totest_locales = {}
totest['transitions'] = ((SmartQuotes,), [
["""\
@@ -299,6 +303,24 @@
"""],
])
+totest_locales['transitions'] = ((SmartQuotes,), [
+["""\
+German "smart quotes" and 'secondary smart quotes'.
+
+.. class:: language-nl
+
+Dutch "smart quotes" and 's Gravenhage (leading apostrophe).
+""",
+u"""\
+<document source="test data">
+ <paragraph>
+ German «smart quotes» and (secondary smart quotes).
+ <paragraph classes="language-nl">
+ Dutch „smart quotes” and ’s Gravenhage (leading apostrophe).
+"""],
+])
+
+
if __name__ == '__main__':
import unittest
unittest.main(defaultTest='suite')
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|