SF.net SVN: matplotlib: [3802] trunk/matplotlib

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3802
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=3802&view=rev
Author:   jouni
Date:     2007-09-06 13:13:11 -0700 (Thu, 06 Sep 2007)

Log Message:
-----------
A more careful reading of the pdf spec reveals that Type 1 fonts
embedded in pdf files are not actually supposed to be in pfa format,
but a similar format where the encrypted part is not encoded in
hexadecimal, and where the "fixed-content" part at the end may be
omitted. This fixes the problems with Preview.app.

Modified Paths:
--------------
    trunk/matplotlib/API_CHANGES
    trunk/matplotlib/CHANGELOG
    trunk/matplotlib/lib/matplotlib/backends/backend_pdf.py
    trunk/matplotlib/lib/matplotlib/type1font.py

Modified: trunk/matplotlib/API_CHANGES
===================================================================

--- trunk/matplotlib/API_CHANGES	2007-09-06 19:36:34 UTC (rev 3801)
+++ trunk/matplotlib/API_CHANGES	2007-09-06 20:13:11 UTC (rev 3802)
@@ -7,9 +7,9 @@
 
     The file type1font.py contains a new class for Type 1 fonts.
     Currently it simply reads pfa and pfb format files and stores the
-    data in pfa format, which is the format for embedding Type 1 fonts
-    in postscript and pdf files. In the future the class might
-    actually parse the font to allow e.g. subsetting.
+    data in a way that is suitable for embedding in pdf files. In the
+    future the class might actually parse the font to allow e.g.
+    subsetting.
 
     FT2Font now supports FT_Attach_File. In practice this can be used
     to read an afm file in addition to a pfa/pfb file, to get metrics

Modified: trunk/matplotlib/CHANGELOG
===================================================================
--- trunk/matplotlib/CHANGELOG	2007-09-06 19:36:34 UTC (rev 3801)
+++ trunk/matplotlib/CHANGELOG	2007-09-06 20:13:11 UTC (rev 3802)
@@ -1,3 +1,6 @@
+2007-09-06 Fixed a bug in the embedding of Type 1 fonts in PDF.
+           Now it doesn't crash Preview.app. - JKS
+
 2007-09-06 Refactored image saving code so that all GUI backends can
 	   save most image types.  See FILETYPES for a matrix of
 	   backends and their supported file types. 

Modified: trunk/matplotlib/lib/matplotlib/backends/backend_pdf.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/backends/backend_pdf.py	2007-09-06 19:36:34 UTC (rev 3801)
+++ trunk/matplotlib/lib/matplotlib/backends/backend_pdf.py	2007-09-06 20:13:11 UTC (rev 3802)
@@ -607,13 +607,13 @@
         self.writeObject(widthsObject, widths)
         self.writeObject(fontdescObject, descriptor)
 
-        fontdata = type1font.Type1Font(filename)
-        len1, len2, len3 = fontdata.lengths()
+        t1font = type1font.Type1Font(filename)
         self.beginStream(fontfileObject.id, None,
-                         { 'Length1': len1,
-                           'Length2': len2,
-                           'Length3': len3 })
-        self.currentstream.write(fontdata.data)
+                         { 'Length1': len(t1font.parts[0]),
+                           'Length2': len(t1font.parts[1]),
+                           'Length3': 0 })
+        self.currentstream.write(t1font.parts[0])
+        self.currentstream.write(t1font.parts[1])
         self.endStream()
 
         return fontdictObject

Modified: trunk/matplotlib/lib/matplotlib/type1font.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/type1font.py	2007-09-06 19:36:34 UTC (rev 3801)
+++ trunk/matplotlib/lib/matplotlib/type1font.py	2007-09-06 20:13:11 UTC (rev 3802)
@@ -1,9 +1,10 @@
 """
 A class representing a Type 1 font.
 
-This version merely allows reading in pfa and pfb files, and stores
-the data in pfa format (which can be embedded in PostScript or PDF
-files). A more complete class might support subsetting.
+This version merely allows reading in pfa and pfb files, stores the
+data in pfa format, and allows reading the parts of the data in a
+format suitable for embedding in pdf files. A more complete class
+might support subsetting.
 
 Usage:  font = Type1Font(filename)
         somefile.write(font.data) # writes out font in pfa format
@@ -23,9 +24,10 @@
     def __init__(self, filename):
         file = open(filename, 'rb')
         try:
-            self._read(file)
+            data = self._read(file)
         finally:
             file.close()
+        self.parts = self._split(data)
 
     def _read(self, file):
         rawdata = file.read()
@@ -33,7 +35,7 @@
             self.data = rawdata
             return
         
-        self.data = ''
+        data = ''
         while len(rawdata) > 0:
             if not rawdata.startswith(chr(128)):
                 raise RuntimeError, \
@@ -46,9 +48,9 @@
                 rawdata = rawdata[6+length:]
 
             if type == 1:       # ASCII text: include verbatim
-                self.data += segment
+                data += segment
             elif type == 2:     # binary data: encode in hexadecimal
-                self.data += ''.join(['%02x' % ord(char)
+                data += ''.join(['%02x' % ord(char)
                                       for char in segment])
             elif type == 3:     # end of file
                 break
@@ -56,9 +58,11 @@
                 raise RuntimeError, \
                     'Unknown segment type %d in pfb file' % type
 
-    def lengths(self):
+        return data
+
+    def _split(self, data):
         """
-        Compute the lengths of the three parts of a Type 1 font.
+        Split the Type 1 font into its three main parts.
 
         The three parts are: (1) the cleartext part, which ends in a
         eexec operator; (2) the encrypted part; (3) the fixed part,
@@ -66,28 +70,33 @@
         lines, a cleartomark operator, and possibly something else.
         """
 
-        # Cleartext part: just find the eexec and skip the eol char(s)
-        idx = self.data.index('eexec')
+        # Cleartext part: just find the eexec and skip whitespace
+        idx = data.index('eexec')
         idx += len('eexec')
-        while self.data[idx] in ('\n', '\r'):
+        while data[idx] in ' \t\r\n':
             idx += 1
         len1 = idx
 
         # Encrypted part: find the cleartomark operator and count
         # zeros backward
-        idx = self.data.rindex('cleartomark') - 1
+        idx = data.rindex('cleartomark') - 1
         zeros = 512
-        while zeros and self.data[idx] in ('0', '\n', '\r'):
-            if self.data[idx] == '0':
+        while zeros and data[idx] in ('0', '\n', '\r'):
+            if data[idx] == '0':
                 zeros -= 1
             idx -= 1
         if zeros:
             raise RuntimeError, 'Insufficiently many zeros in Type 1 font'
 
-        len2 = idx - len1
-        len3 = len(self.data) - idx
+        # Convert encrypted part to binary (if we read a pfb file, we
+        # may end up converting binary to hexadecimal to binary again;
+        # but if we read a pfa file, this part is already in hex, and
+        # I am not quite sure if even the pfb format guarantees that
+        # it will be in binary).
+        binary = ''.join([chr(int(data[i:i+2], 16))
+                          for i in range(len1, idx, 2)])
 
-        return len1, len2, len3
+        return data[:len1], binary, data[idx:]
             
 if __name__ == '__main__':
     import sys


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.