Revision: 804
http://assorted.svn.sourceforge.net/assorted/?rev=804&view=rev
Author: yangzhang
Date: 2008-05-13 15:42:19 -0700 (Tue, 13 May 2008)
Log Message:
-----------
added cp1252_to_unicode()
Modified Paths:
--------------
python-commons/trunk/src/commons/strs.py
Modified: python-commons/trunk/src/commons/strs.py
===================================================================
--- python-commons/trunk/src/commons/strs.py 2008-05-13 07:44:44 UTC (rev 803)
+++ python-commons/trunk/src/commons/strs.py 2008-05-13 22:42:19 UTC (rev 804)
@@ -18,3 +18,41 @@
return str( s )
else:
return s
+
+cp1252_to_unicode_translations = [ (u'\x80',u'\u20AC'),
+ (u'\x82',u'\u201A'),
+ (u'\x83',u'\u0192'),
+ (u'\x84',u'\u201E'),
+ (u'\x85',u'\u2026'),
+ (u'\x86',u'\u2020'),
+ (u'\x87',u'\u2021'),
+ (u'\x88',u'\u02C6'),
+ (u'\x89',u'\u2030'),
+ (u'\x8A',u'\u0160'),
+ (u'\x8B',u'\u2039'),
+ (u'\x8C',u'\u0152'),
+ (u'\x8E',u'\u017D'),
+ (u'\x91',u'\u2018'),
+ (u'\x92',u'\u2019'),
+ (u'\x93',u'\u201C'),
+ (u'\x94',u'\u201D'),
+ (u'\x95',u'\u2022'),
+ (u'\x96',u'\u2013'),
+ (u'\x97',u'\u2014'),
+ (u'\x98',u'\u02DC'),
+ (u'\x99',u'\u2122'),
+ (u'\x9A',u'\u0161'),
+ (u'\x9B',u'\u203A'),
+ (u'\x9C',u'\u0153'),
+ (u'\x9E',u'\u017E'),
+ (u'\x9F',u'\u0178') ]
+
+def cp1252_to_unicode(x):
+ """Converts characters 0x80 through 0x9f to their proper Unicode
+ equivalents. See
+ U{http://www.intertwingly.net/stories/2004/04/14/i18n.html} for the nice
+ translation table on which this is based."""
+ for a,b in cp1252_to_unicode_translations:
+ x = x.replace(a,b)
+ return x
+
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|