Author: chrisz
Date: Wed Jun 7 18:38:29 2006
New Revision: 5351
Modified:
Webware/trunk/WebUtils/Docs/RelNotes-X.Y.phtml
Webware/trunk/WebUtils/Funcs.py
Webware/trunk/WebUtils/Tests/TestFuncs.py
Log:
Let urlDecode() silently ignore erroneous encoding.
After this change, urlDecode() and urlEncode() behave
now exactly like quote_plus() and unquote_plus() in urllib.
For Python versions < 2.4, urlEncode() will be faster,
and urlDecode() is a little bit slower, but more exact.
For Python versions >= 2.4, the urllib functions
are double as fast and behave the same.
Modified: Webware/trunk/WebUtils/Docs/RelNotes-X.Y.phtml
==============================================================================
--- Webware/trunk/WebUtils/Docs/RelNotes-X.Y.phtml (original)
+++ Webware/trunk/WebUtils/Docs/RelNotes-X.Y.phtml Wed Jun 7 18:38:29 2006
@@ -19,7 +19,14 @@
<a name="Improvements"></a><h2>Improvements and Refinements</h2>
<ul>
- <li>...</li>
+ <li>Made urlDecode() behave like urllib.unquote_plus():
+ All illegal codes will now be silently ignored (suggested
+ by Ben Parker). Like implemented in Python 2.4, codes with only
+ one digit will be considered illegal now.</li>
+ <li>Instead of urlEncode() and urlDecode(), you should use
+ urllib.quote_plus() and urllib.unquote_plus() from Python 2.4 on,
+ since this is implemented now faster in urllib, and the behavior is
+ now the same. Added a note to the docstrings of these functions.</li>
</ul>
<a name="Security"></a><h2>Security</h2>
Modified: Webware/trunk/WebUtils/Funcs.py
==============================================================================
--- Webware/trunk/WebUtils/Funcs.py (original)
+++ Webware/trunk/WebUtils/Funcs.py Wed Jun 7 18:38:29 2006
@@ -68,20 +68,36 @@
_urlEncode[' '] = '+'
def urlEncode(s):
- """Return the encoded version of the given string, safe for using as a URL."""
+ """Return the encoded version of the given string.
+
+ The resulting string is safe for using as a URL.
+
+ With Python 2.4 and newer, you should use
+ urllib.quote_plus() instead of urlEncode().
+
+ """
return ''.join(map(lambda c: _urlEncode[c], s))
def urlDecode(s):
"""Return the decoded version of the given string.
- Note that invalid URLs will throw exceptons.
- For example, a URL whose % coding is incorrect.
+ Note that invalid URLs will not throw exceptions.
+ For example, incorrect % codings will be ignored.
+
+ With Python 2.4 and newer, you should use
+ urllib.unquote_plus() instead of urlDecode().
"""
p1 = s.replace('+', ' ').split('%')
p2 = [p1.pop(0)]
for p in p1:
- p2.append(chr(int(p[:2], 16)) + p[2:])
+ if len(p[:2].strip()) == 2:
+ try:
+ p2.append(chr(int(p[:2], 16)) + p[2:])
+ except ValueError:
+ p2.append('%' + p)
+ else:
+ p2.append('%' + p)
return ''.join(p2)
def htmlForDict(dict, addSpace=None, filterValueCallBack=None, maxValueLength=None):
@@ -126,7 +142,7 @@
def normURL(path):
"""Normalizes a URL path, like os.path.normpath.
- Acts on a URL independant of operating system environment.
+ Acts on a URL independant of operating system environment.
"""
if not path:
Modified: Webware/trunk/WebUtils/Tests/TestFuncs.py
==============================================================================
--- Webware/trunk/WebUtils/Tests/TestFuncs.py (original)
+++ Webware/trunk/WebUtils/Tests/TestFuncs.py Wed Jun 7 18:38:29 2006
@@ -24,7 +24,17 @@
allChars
]
+URLDecodeTests = [
+ '%3E and %A7',
+ '%3e and %a7',
+ '& and + and -',
+ 'illegal %3g?',
+ 'illegal %x1?',
+ '1 % 2 %% 3 %%%4 %%20'
+]
+
def TestURLEncode():
+ print 'Test URLEncode'
for test in URLEncodeTests:
if urlEncode(test) == urllib.quote_plus(test, '/'):
print ' Passed test'
@@ -35,6 +45,18 @@
print ' quote_plus = (%s)' % urllib.quote_plus(test, '/')
print
+def TestURLDecode():
+ print 'Test URLDecode'
+ for test in URLDecodeTests:
+ if urlDecode(test) == urllib.unquote_plus(test):
+ print ' Passed test'
+ else:
+ print ' Failed test!'
+ print ' string = (%s)' % test
+ print ' urlDecode = (%s)' % urlDecode(test)
+ print ' unquote_plus = (%s)' % urllib.unquote_plus(test)
+ print
+
def TestEncodeAndDecode(encodeFunc, decodeFunc, tests):
print 'Test %s and %s' % (encodeFunc.__name__, decodeFunc.__name__)
for test in tests:
@@ -61,8 +83,9 @@
def BenchmarkURLEncode():
print 'Benchmark urlEncode() vs. quote_plus()'
- t1 = Benchmark(urllib.quote_plus, URLEncodeTests)
- t2 = Benchmark(urlEncode, URLEncodeTests)
+ tests = URLEncodeTests + map(urlEncode, URLEncodeTests)
+ t1 = Benchmark(urllib.quote_plus, tests)
+ t2 = Benchmark(urlEncode, tests)
print ' quote_plus() = %6.2f secs' % t1
print ' urlEncode() = %6.2f secs' % t2
print ' diff = %6.2f secs' % (t2 - t1)
@@ -70,12 +93,11 @@
print ' factor = %6.2f X' % (t1/t2)
print
-URLDecodeTests = map(urlEncode, URLEncodeTests)
-
def BenchmarkURLDecode():
print 'Benchmark urlDecode() vs. unquote_plus()'
- t1 = Benchmark(urllib.unquote_plus, URLDecodeTests)
- t2 = Benchmark(urlDecode, URLDecodeTests)
+ tests = map(urlEncode, URLEncodeTests) + URLDecodeTests
+ t1 = Benchmark(urllib.unquote_plus, tests)
+ t2 = Benchmark(urlDecode, tests)
print ' unquote_plus() = %6.2f secs' % t1
print ' urlDecode() = %6.2f secs' % t2
print ' diff = %6.2f secs' % (t2 - t1)
@@ -103,10 +125,10 @@
# and with newer Python versions, we don't look so good any more).
if 0:
del URLEncodeTests[-1]
- del URLDecodeTests[-1]
# run tests
TestURLEncode()
+ TestURLDecode()
TestURLEncodeAndDecode()
BenchmarkURLEncode()
BenchmarkURLDecode()
|