|
From: <mi...@us...> - 2025-09-22 21:00:16
|
Revision: 10251
http://sourceforge.net/p/docutils/code/10251
Author: milde
Date: 2025-09-22 21:00:13 +0000 (Mon, 22 Sep 2025)
Log Message:
-----------
rST parser: allow for combining characters in grid tables.
Ignore combining characters when extracting a grid table block and
when parsing the grid table structure.
Allow for combining characters when extracting 2d-block with cell content.
Missing part of the fixes in [r7231].
Fixes [bugs:#128] and [bugs:#512].
Modified Paths:
--------------
trunk/docutils/HISTORY.rst
trunk/docutils/RELEASE-NOTES.rst
trunk/docutils/docutils/parsers/rst/states.py
trunk/docutils/docutils/parsers/rst/tableparser.py
trunk/docutils/docutils/statemachine.py
trunk/docutils/test/test_parsers/test_rst/test_TableParser.py
Modified: trunk/docutils/HISTORY.rst
===================================================================
--- trunk/docutils/HISTORY.rst 2025-09-22 20:59:57 UTC (rev 10250)
+++ trunk/docutils/HISTORY.rst 2025-09-22 21:00:13 UTC (rev 10251)
@@ -17,9 +17,19 @@
Release 0.22.3b1.dev (unpublished)
==================================
-.
+* docutils/parsers/rst/states.py
+ - Ignore combining characters when extracting a grid table block
+* docutils/parsers/rst/tableparser.py
+
+ - Ignore combining characters when parsing the grid table structure.
+
+* docutils/statemachine.py
+
+ - Fix handling of combining characters when extracting 2d-block.
+
+
Release 0.22.2 (2025-09-20)
===========================
Modified: trunk/docutils/RELEASE-NOTES.rst
===================================================================
--- trunk/docutils/RELEASE-NOTES.rst 2025-09-22 20:59:57 UTC (rev 10250)
+++ trunk/docutils/RELEASE-NOTES.rst 2025-09-22 21:00:13 UTC (rev 10251)
@@ -266,7 +266,9 @@
Release 0.22.3b1.dev (unpublished)
==================================
-.
+Rst parser:
+ Allow for combining characters in grid tables.
+ Fixes bugs #128 and #512.
Release 0.22.2 (2025-09-20)
@@ -278,19 +280,11 @@
Release 0.22.1 (2025-09-17)
===========================
-* docutils/parsers/rst/states.py
-
+Rst parser:
- Relax "section title" system messages from SEVERE to ERROR.
- - Fix behaviour with nested parsing into a detached node
- (cf. bugs #508 and #509).
- - New attribute `NestedStateMachine.parent_state_machine`.
- Use case: update the "current node" of parent state machine(s)
- after nested parsing.
- - Better error messages for grid table markup errors (bug #504),
- based on patch #214 by Jynn Nelson.
+ - New attribute `parsers.rst.states.NestedStateMachine.parent_state_machine`.
-* docutils/writers/latex2e/__init__.py
-
+LaTeX writer:
- Add cross-reference anchors (``\phantomsection\label{...}``)
for elements with IDs (fixes bug #503).
- Fix cross-reference anchor placement in figures, images,
Modified: trunk/docutils/docutils/parsers/rst/states.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/states.py 2025-09-22 20:59:57 UTC (rev 10250)
+++ trunk/docutils/docutils/parsers/rst/states.py 2025-09-22 21:00:13 UTC (rev 10251)
@@ -116,7 +116,7 @@
from docutils.nodes import unescape, whitespace_normalize_name
import docutils.parsers.rst
from docutils.parsers.rst import directives, languages, tableparser, roles
-from docutils.utils import escape2null, column_width
+from docutils.utils import escape2null, column_width, strip_combining_chars
from docutils.utils import punctuation_chars, urischemes
from docutils.utils import split_escaped_whitespace
from docutils.utils._roman_numerals import (InvalidRomanNumeralError,
@@ -1848,7 +1848,8 @@
messages.extend(self.malformed_table(block, detail, i))
return [], messages, blank_finish
for i in range(len(block)): # check right edge
- if len(block[i]) != width or block[i][-1] not in '+|':
+ if len(strip_combining_chars(block[i])
+ ) != width or block[i][-1] not in '+|':
detail = 'Right border not aligned or missing.'
messages.extend(self.malformed_table(block, detail, i))
return [], messages, blank_finish
Modified: trunk/docutils/docutils/parsers/rst/tableparser.py
===================================================================
--- trunk/docutils/docutils/parsers/rst/tableparser.py 2025-09-22 20:59:57 UTC (rev 10250)
+++ trunk/docutils/docutils/parsers/rst/tableparser.py 2025-09-22 21:00:13 UTC (rev 10251)
@@ -167,6 +167,9 @@
We'll end up knowing all the row and column boundaries, cell positions
and their dimensions.
"""
+ # a copy of the block without combining characters:
+ self.stripped_block = [strip_combining_chars(line)
+ for line in self.block]
corners = [(0, 0)]
while corners:
top, left = corners.pop(0)
@@ -209,7 +212,7 @@
def scan_cell(self, top, left):
"""Starting at the top-left corner, start tracing out a cell."""
- assert self.block[top][left] == '+'
+ assert self.stripped_block[top][left] == '+'
return self.scan_right(top, left)
def scan_right(self, top, left):
@@ -218,7 +221,7 @@
boundaries ('+').
"""
colseps = {}
- line = self.block[top]
+ line = self.stripped_block[top]
for i in range(left + 1, self.right + 1):
if line[i] == '+':
colseps[i] = [top]
@@ -238,7 +241,7 @@
"""
rowseps = {}
for i in range(top + 1, self.bottom + 1):
- if self.block[i][right] == '+':
+ if self.stripped_block[i][right] == '+':
rowseps[i] = [right]
result = self.scan_left(top, left, i, right)
if result:
@@ -245,7 +248,7 @@
newrowseps, colseps = result
update_dict_of_lists(rowseps, newrowseps)
return i, rowseps, colseps
- elif self.block[i][right] != '|':
+ elif self.stripped_block[i][right] != '|':
return None
return None
@@ -255,7 +258,7 @@
It must line up with the starting point.
"""
colseps = {}
- line = self.block[bottom]
+ line = self.stripped_block[bottom]
for i in range(right - 1, left, -1):
if line[i] == '+':
colseps[i] = [bottom]
@@ -275,9 +278,9 @@
"""
rowseps = {}
for i in range(bottom - 1, top, -1):
- if self.block[i][left] == '+':
+ if self.stripped_block[i][left] == '+':
rowseps[i] = [left]
- elif self.block[i][left] != '|':
+ elif self.stripped_block[i][left] != '|':
return None
return rowseps
Modified: trunk/docutils/docutils/statemachine.py
===================================================================
--- trunk/docutils/docutils/statemachine.py 2025-09-22 20:59:57 UTC (rev 10250)
+++ trunk/docutils/docutils/statemachine.py 2025-09-22 21:00:13 UTC (rev 10251)
@@ -1426,18 +1426,18 @@
def get_2D_block(self, top, left, bottom, right, strip_indent=True):
block = self[top:bottom]
indent = right
- for i in range(len(block.data)):
- # get slice from line, care for combining characters
- ci = utils.column_indices(block.data[i])
+ for i, line in enumerate(block.data):
+ # trim line to block borders, allow for for combining characters
+ adjusted_indices = utils.column_indices(line)
try:
- left = ci[left]
+ left_i = adjusted_indices[left]
except IndexError:
- left += len(block.data[i]) - len(ci)
+ left_i = left
try:
- right = ci[right]
+ right_i = adjusted_indices[right]
except IndexError:
- right += len(block.data[i]) - len(ci)
- block.data[i] = line = block.data[i][left:right].rstrip()
+ right_i = len(line)
+ block.data[i] = line = line[left_i:right_i].rstrip()
if line:
indent = min(indent, len(line) - len(line.lstrip()))
if strip_indent and 0 < indent < right:
Modified: trunk/docutils/test/test_parsers/test_rst/test_TableParser.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_TableParser.py 2025-09-22 20:59:57 UTC (rev 10250)
+++ trunk/docutils/test/test_parsers/test_rst/test_TableParser.py 2025-09-22 21:00:13 UTC (rev 10251)
@@ -73,32 +73,32 @@
[],
[[(0, 0, 1, ['A table with']),
(0, 0, 1, ['two columns.'])]])],
-# Combining chars in grid tables still fail
-# ["""\
-# +--------------+------------------+
-# | A tāble w̅ith | comb̲ining chars. |
-# +--------------+------------------+
-# """,
-# [(0, 0, 2, 15, ['A table with']),
-# (0, 15, 2, 30, ['combining chars.'])],
-# ([14, 14],
-# [],
-# [[(0, 0, 1, ['A table with']),
-# (0, 0, 1, ['combining chars.'])]])],
+# Combining chars in table cells
["""\
++--------------+------------------+
+| A tāble w̅ith | comb̲ining chars. |
++--------------+------------------+
+""",
+[(0, 0, 2, 15, ['A tāble w̅ith']),
+ (0, 15, 2, 34, ['comb̲ining chars.'])],
+([14, 18],
+ [],
+ [[(0, 0, 1, ['A tāble w̅ith']),
+ (0, 0, 1, ['comb̲ining chars.'])]])],
+["""\
+--------------+-------------+
-| A table with | two columns |
+| A tāble w̅ith | two columns |
+--------------+-------------+
| and | two rows. |
+--------------+-------------+
""",
-[(0, 0, 2, 15, ['A table with']),
+[(0, 0, 2, 15, ['A tāble w̅ith']),
(0, 15, 2, 29, ['two columns']),
(2, 0, 4, 15, ['and']),
(2, 15, 4, 29, ['two rows.'])],
([14, 13],
[],
- [[(0, 0, 1, ['A table with']),
+ [[(0, 0, 1, ['A tāble w̅ith']),
(0, 0, 1, ['two columns'])],
[(0, 0, 3, ['and']),
(0, 0, 3, ['two rows.'])]])],
@@ -126,18 +126,18 @@
None]])],
["""\
+------------+-------------+---------------+
-| A table | two rows in | and row spans |
-| with three +-------------+ to left and |
+| A tāble | two rows in | and row spans |
+| with t̲h̲r̲e̲e̲ +-------------+ to left and |
| columns, | the middle, | right. |
+------------+-------------+---------------+
""",
-[(0, 0, 4, 13, ['A table', 'with three', 'columns,']),
+[(0, 0, 4, 13, ['A tāble', 'with t̲h̲r̲e̲e̲', 'columns,']),
(0, 13, 2, 27, ['two rows in']),
(0, 27, 4, 43, ['and row spans', 'to left and', 'right.']),
(2, 13, 4, 27, ['the middle,'])],
([12, 13, 15],
[],
- [[(1, 0, 1, ['A table', 'with three', 'columns,']),
+ [[(1, 0, 1, ['A tāble', 'with t̲h̲r̲e̲e̲', 'columns,']),
(0, 0, 1, ['two rows in']),
(1, 0, 1, ['and row spans', 'to left and', 'right.'])],
[None,
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|