[Translate-cvs] src/translate/storage po.py,1.44,1.45

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/translate/src/translate/storage
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5932

Modified Files:
	po.py 
Log Message:
added code to encode / decode PO files based on the encoding specified in their header, rather than hard-coding UTF-8


Index: po.py
===================================================================
RCS file: /cvsroot/translate/src/translate/storage/po.py,v
retrieving revision 1.44
retrieving revision 1.45
diff -u -d -r1.44 -r1.45

--- po.py	10 Feb 2005 19:07:38 -0000	1.44
+++ po.py	17 Feb 2005 17:30:55 -0000	1.45
@@ -336,10 +336,12 @@
 
 class pofile:
   """this represents a .po file containing various poelements"""
-  def __init__(self, inputfile=None):
-    """construct a pofile, optionally reading in from inputfile"""
+  def __init__(self, inputfile=None, encoding=None):
+    """construct a pofile, optionally reading in from inputfile.
+    encoding can be specified but otherwise will be read from the PO header"""
     self.poelements = []
     self.filename = ''
+    self.encoding = encoding
     if inputfile is not None:
       self.parse(inputfile)
 
@@ -411,18 +413,47 @@
     lineitem = ""
     for line in header.msgstr:
       line = getunquotedstr([line]).strip()
-      if not ":" in line:
-        continue
       if line.endswith("\\n"):
         lineitem += line[:-2]
       else:
         lineitem += line
         continue
+      if not ":" in lineitem:
+        continue
       key, value = lineitem.split(":", 1)
       headervalues[key.strip()] = value.strip()
       lineitem = ""
     return headervalues
 
+  def changeencoding(self, newencoding):
+    """changes the encoding on the file"""
+    self.encoding = newencoding
+    if not self.poelements:
+      return
+    header = self.poelements[0]
+    if not (header.isheader() or header.isblank()):
+      return
+    charsetline = None
+    headerstr = unquotefrompo(header.msgstr, True)
+    for line in headerstr.split("\\n"):
+      if not ":" in line: continue
+      key, value = line.strip().split(":", 1)
+      if key.strip() != "Content-Type": continue
+      charsetline = line
+    if charsetline is None:
+      headerstr += "Content-Type: text/plain; charset=%s" % self.encoding
+    else:
+      charset = sre.search("charset=([^ ]*)", charsetline)
+      if charset is None:
+        if not charsetline.strip().endswith(";"):
+          newcharsetline += ";"
+        newcharsetline += " charset=%s" % self.encoding
+      else:
+        charset = charset.group(1)
+        newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self.encoding, 1)
+      headerstr = headerstr.replace(charsetline, newcharsetline, 1)
+    header.msgstr = quoteforpo(headerstr)
+
   def isempty(self):
     """returns whether the po file doesn't contain any definitions..."""
     if len(self.poelements) == 0:
@@ -452,6 +483,15 @@
           start += linesprocessed
           if linesprocessed > 1:
             self.poelements.append(newpe)
+            if self.encoding is None and newpe.isheader():
+              headervalues = self.parseheader()
+              contenttype = headervalues.get("Content-Type", None)
+              if contenttype is not None:
+                charsetmatch = sre.search("charset=([^ ]*)", contenttype)
+                self.encoding = charsetmatch and charsetmatch.group(1)
+                # now that we know the encoding, decode the whole file
+                if self.encoding is not None:
+                  lines = self.decode(lines)
           else:
             finished = 1
       end = end+1
@@ -521,14 +561,23 @@
       lines.extend(pelines)
       # add a line break
       lines.append('\n')
-    return self.utf8encode(lines)
+    return self.encode(lines)
 
-  def utf8encode(self, lines):
-    """encode any unicode strings in lines as utf8"""
+  def encode(self, lines):
+    """encode any unicode strings in lines in self.encoding"""
     newlines = []
     for line in lines:
       if isinstance(line, unicode):
-        line = line.encode("utf8")
+        line = line.encode(self.encoding)
+      newlines.append(line)
+    return newlines
+
+  def decode(self, lines):
+    """decode any non-unicode strings in lines with self.encoding"""
+    newlines = []
+    for line in lines:
+      if isinstance(line, str):
+        line = line.decode(self.encoding)
       newlines.append(line)
     return newlines
 





[Translate-cvs] src/translate/storage po.py,1.44,1.45

Localization tools built by localizers for localizers

[Translate-cvs] src/translate/storage po.py,1.44,1.45