|
From: <go...@us...> - 2016-12-16 03:45:10
|
Revision: 7998
http://sourceforge.net/p/docutils/code/7998
Author: goodger
Date: 2016-12-16 03:45:07 +0000 (Fri, 16 Dec 2016)
Log Message:
-----------
Added functionality (plus tests & docs): escaped whitespace in URI contexts.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/RELEASE-NOTES.txt
trunk/docutils/docs/ref/rst/restructuredtext.txt
trunk/docutils/docutils/parsers/rst/states.py
trunk/docutils/docutils/utils/__init__.py
trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py
trunk/docutils/test/test_parsers/test_rst/test_targets.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/HISTORY.txt 2016-12-16 03:45:07 UTC (rev 7998)
@@ -13,11 +13,28 @@
.. contents::
+
Changes Since 0.13.1
====================
+* docs/ref/rst/restructuredtext.txt:
+
+ - Added documentation for escaped whitespace in URI contexts.
+
+* docutils/parsers/rst/states.py:
+
+ - Added functionality: escaped whitespace in URI contexts.
+
+* docutils/utils/__init__.py:
+
+ - Added ``split_escaped_whitespace`` function, support for escaped
+ whitespace in URI contexts.
+
+* tools/
+
- New front-end ``rst2html4.py``.
+
Release 0.13.1 (2016-12-09)
===========================
Modified: trunk/docutils/RELEASE-NOTES.txt
===================================================================
--- trunk/docutils/RELEASE-NOTES.txt 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/RELEASE-NOTES.txt 2016-12-16 03:45:07 UTC (rev 7998)
@@ -48,6 +48,12 @@
Changes Since 0.13.1
====================
+* docutils/parsers/rst/:
+
+ - Added functionality: escaped whitespace in URI contexts.
+
+* tools/
+
- New front-end ``rst2html4.py``.
Modified: trunk/docutils/docs/ref/rst/restructuredtext.txt
===================================================================
--- trunk/docutils/docs/ref/rst/restructuredtext.txt 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/docs/ref/rst/restructuredtext.txt 2016-12-16 03:45:07 UTC (rev 7998)
@@ -347,7 +347,8 @@
the markup. In reStructuredText we use the backslash, commonly used
as an escaping character in other domains.
-A backslash followed by any character (except whitespace characters)
+A backslash followed by any character (except whitespace characters
+in non-URI contexts)
escapes that character. The escaped character represents the
character itself, and is prevented from playing a role in any markup
interpretation. The backslash is removed from the output. A literal
@@ -355,9 +356,12 @@
backslash "escapes" the second, preventing it being interpreted in an
"escaping" role).
-Backslash-escaped whitespace characters are removed from the document.
+In non-URI contexts,
+backslash-escaped whitespace characters are removed from the document.
This allows for character-level `inline markup`_.
+In URIs, backslash-escaped whitespace represents a single space.
+
There are two contexts in which backslashes have no special meaning:
literal blocks and inline literals. In these contexts, a single
backslash represents a literal backslash, without having to double up.
@@ -1881,7 +1885,7 @@
explicit markup start and target name, or it may begin in an
indented text block immediately following, with no intervening
blank lines. If there are multiple lines in the link block, they
- are concatenated. Any whitespace is removed (whitespace is
+ are concatenated. Any unescaped whitespace is removed (whitespace is
permitted to allow for line wrapping). The following external
hyperlink targets are equivalent::
@@ -1894,6 +1898,10 @@
http://docutils.
sourceforge.net/rst.html
+ Escaped whitespace is preserved as intentional spaces, e.g.::
+
+ .. _reference: ../local\ path\ with\ spaces.html
+
If an external hyperlink target's URI contains an underscore as its
last character, it must be escaped to avoid being mistaken for an
indirect hyperlink target::
Modified: trunk/docutils/docutils/parsers/rst/states.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/states.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/docutils/parsers/rst/states.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -117,6 +117,7 @@
from docutils.parsers.rst.languages import en as _fallback_language_module
from docutils.utils import escape2null, unescape, column_width
from docutils.utils import punctuation_chars, roman, urischemes
+from docutils.utils import split_escaped_whitespace
class MarkupError(DataError): pass
class UnknownInterpretedRoleError(DataError): pass
@@ -807,7 +808,9 @@
target.indirect_reference_name = aliastext[:-1]
else:
aliastype = 'uri'
- alias = ''.join(aliastext.split())
+ alias_parts = split_escaped_whitespace(match.group(2))
+ alias = ' '.join(''.join(unescape(part).split())
+ for part in alias_parts)
alias = self.adjust_uri(alias)
if alias.endswith(r'\_'):
alias = alias[:-2] + '_'
@@ -1958,8 +1961,10 @@
refname = self.is_reference(reference)
if refname:
return 'refname', refname
- reference = ''.join([''.join(line.split()) for line in block])
- return 'refuri', unescape(reference)
+ ref_parts = split_escaped_whitespace(' '.join(block))
+ reference = ' '.join(''.join(unescape(part).split())
+ for part in ref_parts)
+ return 'refuri', reference
def is_reference(self, reference):
match = self.explicit.patterns.reference.match(
Modified: trunk/docutils/docutils/utils/__init__.py
===================================================================
--- trunk/docutils/docutils/utils/__init__.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/docutils/utils/__init__.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -13,6 +13,7 @@
import os
import os.path
import re
+import itertools
import warnings
import unicodedata
from docutils import ApplicationError, DataError
@@ -575,7 +576,7 @@
parts.append('\x00' + text[found+1:found+2])
start = found + 2 # skip character after escape
-def unescape(text, restore_backslashes=False):
+def unescape(text, restore_backslashes=False, respect_whitespace=False):
"""
Return a string with nulls removed or restored to backslashes.
Backslash-escaped spaces are also removed.
@@ -587,6 +588,16 @@
text = ''.join(text.split(sep))
return text
+def split_escaped_whitespace(text):
+ """
+ Split `text` on escaped whitespace (null+space or null+newline).
+ Return a list of strings.
+ """
+ strings = text.split('\x00 ')
+ strings = [string.split('\x00\n') for string in strings]
+ # flatten list of lists of strings to list of strings:
+ return list(itertools.chain(*strings))
+
def strip_combining_chars(text):
if isinstance(text, str) and sys.version_info < (3,0):
return text
Modified: trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/test/test_parsers/test_rst/test_inline_markup.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -981,6 +981,21 @@
<reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
embedded URI with whitespace
"""],
+[r"""
+`embedded URI with escaped whitespace <http://example.com/a\
+long/path\ and/some\ escaped\ whitespace>`__
+
+`<omitted\ reference\ text\ with\ escaped\ whitespace>`__
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ <reference name="embedded URI with escaped whitespace" refuri="http://example.com/a long/path and/some escaped whitespace">
+ embedded URI with escaped whitespace
+ <paragraph>
+ <reference name="omitted reference text with escaped whitespace" refuri="omitted reference text with escaped whitespace">
+ omitted reference text with escaped whitespace
+"""],
["""\
`embedded email address <jd...@ex...>`__
@@ -1140,6 +1155,15 @@
<reference name="embedded alias with whitespace" refname="alias long phrase">
embedded alias with whitespace
"""],
+["""\
+`<embedded alias with whitespace_>`__
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ <reference name="embedded alias with whitespace" refname="embedded alias with whitespace">
+ embedded alias with whitespace
+"""],
[r"""
`no embedded alias (whitespace inside bracket) < alias_ >`__
Modified: trunk/docutils/test/test_parsers/test_rst/test_targets.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_targets.py 2016-12-13 08:27:53 UTC (rev 7997)
+++ trunk/docutils/test/test_parsers/test_rst/test_targets.py 2016-12-16 03:45:07 UTC (rev 7998)
@@ -36,7 +36,7 @@
<document source="test data">
<target ids="optional-space-before-colon" names="optional\ space\ before\ colon">
"""],
-["""\
+[r"""
External hyperlink targets:
.. _one-liner: http://structuredtext.sourceforge.net
@@ -49,7 +49,10 @@
http://structuredtext.
sourceforge.net
-.. _not-indirect: uri\\_
+.. _escaped-whitespace: http://example.org/a\ path\ with\
+ spaces.html
+
+.. _not-indirect: uri\_
""",
"""\
<document source="test data">
@@ -58,6 +61,7 @@
<target ids="one-liner" names="one-liner" refuri="http://structuredtext.sourceforge.net">
<target ids="starts-on-this-line" names="starts-on-this-line" refuri="http://structuredtext.sourceforge.net">
<target ids="entirely-below" names="entirely-below" refuri="http://structuredtext.sourceforge.net">
+ <target ids="escaped-whitespace" names="escaped-whitespace" refuri="http://example.org/a path with spaces.html">
<target ids="not-indirect" names="not-indirect" refuri="uri_">
"""],
["""\
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|