From: <mi...@us...> - 2025-09-22 21:00:16
|
Revision: 10251 http://sourceforge.net/p/docutils/code/10251 Author: milde Date: 2025-09-22 21:00:13 +0000 (Mon, 22 Sep 2025) Log Message: ----------- rST parser: allow for combining characters in grid tables. Ignore combining characters when extracting a grid table block and when parsing the grid table structure. Allow for combining characters when extracting 2d-block with cell content. Missing part of the fixes in [r7231]. Fixes [bugs:#128] and [bugs:#512]. Modified Paths: -------------- trunk/docutils/HISTORY.rst trunk/docutils/RELEASE-NOTES.rst trunk/docutils/docutils/parsers/rst/states.py trunk/docutils/docutils/parsers/rst/tableparser.py trunk/docutils/docutils/statemachine.py trunk/docutils/test/test_parsers/test_rst/test_TableParser.py Modified: trunk/docutils/HISTORY.rst =================================================================== --- trunk/docutils/HISTORY.rst 2025-09-22 20:59:57 UTC (rev 10250) +++ trunk/docutils/HISTORY.rst 2025-09-22 21:00:13 UTC (rev 10251) @@ -17,9 +17,19 @@ Release 0.22.3b1.dev (unpublished) ================================== -. +* docutils/parsers/rst/states.py + - Ignore combining characters when extracting a grid table block +* docutils/parsers/rst/tableparser.py + + - Ignore combining characters when parsing the grid table structure. + +* docutils/statemachine.py + + - Fix handling of combining characters when extracting 2d-block. + + Release 0.22.2 (2025-09-20) =========================== Modified: trunk/docutils/RELEASE-NOTES.rst =================================================================== --- trunk/docutils/RELEASE-NOTES.rst 2025-09-22 20:59:57 UTC (rev 10250) +++ trunk/docutils/RELEASE-NOTES.rst 2025-09-22 21:00:13 UTC (rev 10251) @@ -266,7 +266,9 @@ Release 0.22.3b1.dev (unpublished) ================================== -. +Rst parser: + Allow for combining characters in grid tables. + Fixes bugs #128 and #512. Release 0.22.2 (2025-09-20) @@ -278,19 +280,11 @@ Release 0.22.1 (2025-09-17) =========================== -* docutils/parsers/rst/states.py - +Rst parser: - Relax "section title" system messages from SEVERE to ERROR. - - Fix behaviour with nested parsing into a detached node - (cf. bugs #508 and #509). - - New attribute `NestedStateMachine.parent_state_machine`. - Use case: update the "current node" of parent state machine(s) - after nested parsing. - - Better error messages for grid table markup errors (bug #504), - based on patch #214 by Jynn Nelson. + - New attribute `parsers.rst.states.NestedStateMachine.parent_state_machine`. -* docutils/writers/latex2e/__init__.py - +LaTeX writer: - Add cross-reference anchors (``\phantomsection\label{...}``) for elements with IDs (fixes bug #503). - Fix cross-reference anchor placement in figures, images, Modified: trunk/docutils/docutils/parsers/rst/states.py =================================================================== --- trunk/docutils/docutils/parsers/rst/states.py 2025-09-22 20:59:57 UTC (rev 10250) +++ trunk/docutils/docutils/parsers/rst/states.py 2025-09-22 21:00:13 UTC (rev 10251) @@ -116,7 +116,7 @@ from docutils.nodes import unescape, whitespace_normalize_name import docutils.parsers.rst from docutils.parsers.rst import directives, languages, tableparser, roles -from docutils.utils import escape2null, column_width +from docutils.utils import escape2null, column_width, strip_combining_chars from docutils.utils import punctuation_chars, urischemes from docutils.utils import split_escaped_whitespace from docutils.utils._roman_numerals import (InvalidRomanNumeralError, @@ -1848,7 +1848,8 @@ messages.extend(self.malformed_table(block, detail, i)) return [], messages, blank_finish for i in range(len(block)): # check right edge - if len(block[i]) != width or block[i][-1] not in '+|': + if len(strip_combining_chars(block[i]) + ) != width or block[i][-1] not in '+|': detail = 'Right border not aligned or missing.' messages.extend(self.malformed_table(block, detail, i)) return [], messages, blank_finish Modified: trunk/docutils/docutils/parsers/rst/tableparser.py =================================================================== --- trunk/docutils/docutils/parsers/rst/tableparser.py 2025-09-22 20:59:57 UTC (rev 10250) +++ trunk/docutils/docutils/parsers/rst/tableparser.py 2025-09-22 21:00:13 UTC (rev 10251) @@ -167,6 +167,9 @@ We'll end up knowing all the row and column boundaries, cell positions and their dimensions. """ + # a copy of the block without combining characters: + self.stripped_block = [strip_combining_chars(line) + for line in self.block] corners = [(0, 0)] while corners: top, left = corners.pop(0) @@ -209,7 +212,7 @@ def scan_cell(self, top, left): """Starting at the top-left corner, start tracing out a cell.""" - assert self.block[top][left] == '+' + assert self.stripped_block[top][left] == '+' return self.scan_right(top, left) def scan_right(self, top, left): @@ -218,7 +221,7 @@ boundaries ('+'). """ colseps = {} - line = self.block[top] + line = self.stripped_block[top] for i in range(left + 1, self.right + 1): if line[i] == '+': colseps[i] = [top] @@ -238,7 +241,7 @@ """ rowseps = {} for i in range(top + 1, self.bottom + 1): - if self.block[i][right] == '+': + if self.stripped_block[i][right] == '+': rowseps[i] = [right] result = self.scan_left(top, left, i, right) if result: @@ -245,7 +248,7 @@ newrowseps, colseps = result update_dict_of_lists(rowseps, newrowseps) return i, rowseps, colseps - elif self.block[i][right] != '|': + elif self.stripped_block[i][right] != '|': return None return None @@ -255,7 +258,7 @@ It must line up with the starting point. """ colseps = {} - line = self.block[bottom] + line = self.stripped_block[bottom] for i in range(right - 1, left, -1): if line[i] == '+': colseps[i] = [bottom] @@ -275,9 +278,9 @@ """ rowseps = {} for i in range(bottom - 1, top, -1): - if self.block[i][left] == '+': + if self.stripped_block[i][left] == '+': rowseps[i] = [left] - elif self.block[i][left] != '|': + elif self.stripped_block[i][left] != '|': return None return rowseps Modified: trunk/docutils/docutils/statemachine.py =================================================================== --- trunk/docutils/docutils/statemachine.py 2025-09-22 20:59:57 UTC (rev 10250) +++ trunk/docutils/docutils/statemachine.py 2025-09-22 21:00:13 UTC (rev 10251) @@ -1426,18 +1426,18 @@ def get_2D_block(self, top, left, bottom, right, strip_indent=True): block = self[top:bottom] indent = right - for i in range(len(block.data)): - # get slice from line, care for combining characters - ci = utils.column_indices(block.data[i]) + for i, line in enumerate(block.data): + # trim line to block borders, allow for for combining characters + adjusted_indices = utils.column_indices(line) try: - left = ci[left] + left_i = adjusted_indices[left] except IndexError: - left += len(block.data[i]) - len(ci) + left_i = left try: - right = ci[right] + right_i = adjusted_indices[right] except IndexError: - right += len(block.data[i]) - len(ci) - block.data[i] = line = block.data[i][left:right].rstrip() + right_i = len(line) + block.data[i] = line = line[left_i:right_i].rstrip() if line: indent = min(indent, len(line) - len(line.lstrip())) if strip_indent and 0 < indent < right: Modified: trunk/docutils/test/test_parsers/test_rst/test_TableParser.py =================================================================== --- trunk/docutils/test/test_parsers/test_rst/test_TableParser.py 2025-09-22 20:59:57 UTC (rev 10250) +++ trunk/docutils/test/test_parsers/test_rst/test_TableParser.py 2025-09-22 21:00:13 UTC (rev 10251) @@ -73,32 +73,32 @@ [], [[(0, 0, 1, ['A table with']), (0, 0, 1, ['two columns.'])]])], -# Combining chars in grid tables still fail -# ["""\ -# +--------------+------------------+ -# | A tāble w̅ith | comb̲ining chars. | -# +--------------+------------------+ -# """, -# [(0, 0, 2, 15, ['A table with']), -# (0, 15, 2, 30, ['combining chars.'])], -# ([14, 14], -# [], -# [[(0, 0, 1, ['A table with']), -# (0, 0, 1, ['combining chars.'])]])], +# Combining chars in table cells ["""\ ++--------------+------------------+ +| A tāble w̅ith | comb̲ining chars. | ++--------------+------------------+ +""", +[(0, 0, 2, 15, ['A tāble w̅ith']), + (0, 15, 2, 34, ['comb̲ining chars.'])], +([14, 18], + [], + [[(0, 0, 1, ['A tāble w̅ith']), + (0, 0, 1, ['comb̲ining chars.'])]])], +["""\ +--------------+-------------+ -| A table with | two columns | +| A tāble w̅ith | two columns | +--------------+-------------+ | and | two rows. | +--------------+-------------+ """, -[(0, 0, 2, 15, ['A table with']), +[(0, 0, 2, 15, ['A tāble w̅ith']), (0, 15, 2, 29, ['two columns']), (2, 0, 4, 15, ['and']), (2, 15, 4, 29, ['two rows.'])], ([14, 13], [], - [[(0, 0, 1, ['A table with']), + [[(0, 0, 1, ['A tāble w̅ith']), (0, 0, 1, ['two columns'])], [(0, 0, 3, ['and']), (0, 0, 3, ['two rows.'])]])], @@ -126,18 +126,18 @@ None]])], ["""\ +------------+-------------+---------------+ -| A table | two rows in | and row spans | -| with three +-------------+ to left and | +| A tāble | two rows in | and row spans | +| with t̲h̲r̲e̲e̲ +-------------+ to left and | | columns, | the middle, | right. | +------------+-------------+---------------+ """, -[(0, 0, 4, 13, ['A table', 'with three', 'columns,']), +[(0, 0, 4, 13, ['A tāble', 'with t̲h̲r̲e̲e̲', 'columns,']), (0, 13, 2, 27, ['two rows in']), (0, 27, 4, 43, ['and row spans', 'to left and', 'right.']), (2, 13, 4, 27, ['the middle,'])], ([12, 13, 15], [], - [[(1, 0, 1, ['A table', 'with three', 'columns,']), + [[(1, 0, 1, ['A tāble', 'with t̲h̲r̲e̲e̲', 'columns,']), (0, 0, 1, ['two rows in']), (1, 0, 1, ['and row spans', 'to left and', 'right.'])], [None, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |