|
From: <sub...@co...> - 2007-05-25 01:52:13
|
Author: ianb
Date: 2007-05-24 19:52:10 -0600 (Thu, 24 May 2007)
New Revision: 2700
Modified:
FormEncode/trunk/docs/news.txt
FormEncode/trunk/formencode/htmlfill.py
FormEncode/trunk/tests/test_htmlfill.py
Log:
Fix problem with entity def unescaping in attribute values
Modified: FormEncode/trunk/docs/news.txt
===================================================================
--- FormEncode/trunk/docs/news.txt 2007-05-25 01:32:05 UTC (rev 2699)
+++ FormEncode/trunk/docs/news.txt 2007-05-25 01:52:10 UTC (rev 2700)
@@ -20,6 +20,9 @@
* Fixed bug in htmlfill when a document ends with no trailing text
after the last tag.
+* Fix problem with HTMLParser's default unescaping routing, which only
+ understood a very limited number of entities in attribute values.
+
0.7.1
-----
Modified: FormEncode/trunk/formencode/htmlfill.py
===================================================================
--- FormEncode/trunk/formencode/htmlfill.py 2007-05-25 01:32:05 UTC (rev 2699)
+++ FormEncode/trunk/formencode/htmlfill.py 2007-05-25 01:52:10 UTC (rev 2700)
@@ -6,6 +6,7 @@
import HTMLParser
import cgi
import re
+from htmlentitydefs import name2codepoint
__all__ = ['render', 'htmlliteral', 'default_formatter',
'none_formatter', 'escape_formatter',
@@ -263,6 +264,30 @@
def add_key(self, key):
self.used_keys[key] = 1
+ _entityref_re = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*);')
+ _charref_re = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+);')
+
+ def unescape(self, s):
+ s = self._entityref_re.sub(self._sub_entityref, s)
+ s = self._charref_re.sub(self._sub_charref, s)
+ return s
+
+ def _sub_entityref(self, match):
+ name = match.group(1)
+ if name not in name2codepoint:
+ # If we don't recognize it, pass it through as though it
+ # wasn't an entity ref at all
+ return match.group(0)
+ return unichr(name2codepoint[name])
+
+ def _sub_charref(self, match):
+ num = match.group(1)
+ if num.lower().startswith('0x'):
+ num = int(num, 16)
+ else:
+ num = int(num)
+ return unichr(num)
+
def handle_starttag(self, tag, attrs, startend=False):
self.write_pos()
if tag == 'input':
@@ -343,6 +368,7 @@
self.used_errors[name] = 1
def handle_input(self, attrs, startend):
+ print 'attrs', attrs
t = (self.get_attr(attrs, 'type') or 'text').lower()
name = self.get_attr(attrs, 'name')
self.write_marker(name)
@@ -402,6 +428,7 @@
self.skip_next = True
self.add_key(name)
elif t == 'submit' or t == 'reset' or t == 'button':
+ print 'set_attr', repr(value or self.get_attr(attrs, 'value', ''))
self.set_attr(attrs, 'value', value or
self.get_attr(attrs, 'value', ''))
self.write_tag('input', attrs, startend)
Modified: FormEncode/trunk/tests/test_htmlfill.py
===================================================================
--- FormEncode/trunk/tests/test_htmlfill.py 2007-05-25 01:32:05 UTC (rev 2699)
+++ FormEncode/trunk/tests/test_htmlfill.py 2007-05-25 01:52:10 UTC (rev 2700)
@@ -1,6 +1,7 @@
import sys
import os
import re
+from htmlentitydefs import name2codepoint
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(
os.path.abspath(__file__))))
@@ -72,4 +73,12 @@
checker(p, listener.schema())
def test_no_trailing_newline():
- assert htmlfill.render('<html><body></body></html>',{},{}) == '<html><body></body></html>'
+ assert (htmlfill.render('<html><body></body></html>', {}, {})
+ == '<html><body></body></html>')
+
+def test_escape_defaults():
+ rarr = unichr(name2codepoint['rarr'])
+ assert (htmlfill.render('<input type="submit" value="next>→">', {}, {})
+ == '<input type="submit" value="next>%s">' % rarr)
+ assert (htmlfill.render('<input type="submit" value="1&2">', {}, {})
+ == '<input type="submit" value="1&2">')
|