|
From: <sto...@us...> - 2007-06-15 12:00:23
|
Revision: 36
http://svn.sourceforge.net/fb2-perl-tools/?rev=36&view=rev
Author: storchaka
Date: 2007-06-15 05:00:25 -0700 (Fri, 15 Jun 2007)
Log Message:
-----------
Normalize empty elements after space removing.
Modified Paths:
--------------
trunk/fb2-python-tools/fb2format.py
Modified: trunk/fb2-python-tools/fb2format.py
===================================================================
--- trunk/fb2-python-tools/fb2format.py 2007-06-14 22:36:14 UTC (rev 35)
+++ trunk/fb2-python-tools/fb2format.py 2007-06-15 12:00:25 UTC (rev 36)
@@ -30,6 +30,7 @@
import sys, getopt, os, os.path, xml.dom.minidom, codecs, cStringIO
_spaces_re = re.compile( r'[ \t\r\n]{2,}|[\t\r\n]' )
+_empty_element_re = re.compile( r'<([^ >]+)([^>]*)(?<!/)></\1>' )
def _make_tags_switch( tags ):
return re.compile( '(%s)' % '|'.join( '<%s(?: [^>]*)?>.*?</%s>' % (tag, tag) for tag in tags ), re.DOTALL )
_text_re = _make_tags_switch( ('p', 'v', 'subtitle', 'text-author') )
@@ -49,13 +50,13 @@
if _text_re.match( s ):
return s
else:
- return s.strip( ' ' ).replace( '> ', '>' ).replace( ' <', '<' )
+ return _empty_element_re.sub( r'<\1\2/>', s.strip( ' ' ).replace( '> ', '>' ).replace( ' <', '<' ) )
def _format_tag( s ):
if _text_re.match( s ):
return s
else:
- return s.strip( ' ' ).replace( '> ', '>' ).replace( ' <', '<' ).replace( '><', '>\n<' )
+ return _empty_element_re.sub( r'<\1\2/>', s.strip( ' ' ).replace( '> ', '>' ).replace( ' <', '<' ) ).replace( '><', '>\n<' )
def fb2format( data, squeeze = False, squeezeBinary = False ):
data = _spaces_re.sub( ' ', data )
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|