From: <pj...@us...> - 2008-11-24 05:01:07
|
Revision: 5629 http://jython.svn.sourceforge.net/jython/?rev=5629&view=rev Author: pjenvey Date: 2008-11-24 05:01:02 +0000 (Mon, 24 Nov 2008) Log Message: ----------- from: http://svn.python.org/projects/python/branches/release25-maint/Lib/zipfile.py@60117 http://svn.python.org/projects/python/branches/release25-maint/Lib/test/test_zipfile.py@46982 Added Paths: ----------- trunk/jython/Lib/test/test_zipfile.py trunk/jython/Lib/zipfile.py Added: trunk/jython/Lib/test/test_zipfile.py =================================================================== --- trunk/jython/Lib/test/test_zipfile.py (rev 0) +++ trunk/jython/Lib/test/test_zipfile.py 2008-11-24 05:01:02 UTC (rev 5629) @@ -0,0 +1,357 @@ +# We can test part of the module without zlib. +try: + import zlib +except ImportError: + zlib = None + +import zipfile, os, unittest, sys, shutil + +from StringIO import StringIO +from tempfile import TemporaryFile + +from test.test_support import TESTFN, run_unittest + +TESTFN2 = TESTFN + "2" + +class TestsWithSourceFile(unittest.TestCase): + def setUp(self): + line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000)) + self.data = '\n'.join(line_gen) + + # Make a source file with some lines + fp = open(TESTFN, "wb") + fp.write(self.data) + fp.close() + + def zipTest(self, f, compression): + # Create the ZIP archive + zipfp = zipfile.ZipFile(f, "w", compression) + zipfp.write(TESTFN, "another"+os.extsep+"name") + zipfp.write(TESTFN, TESTFN) + zipfp.writestr("strfile", self.data) + zipfp.close() + + # Read the ZIP archive + zipfp = zipfile.ZipFile(f, "r", compression) + self.assertEqual(zipfp.read(TESTFN), self.data) + self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data) + self.assertEqual(zipfp.read("strfile"), self.data) + + # Print the ZIP directory + fp = StringIO() + stdout = sys.stdout + try: + sys.stdout = fp + + zipfp.printdir() + finally: + sys.stdout = stdout + + directory = fp.getvalue() + lines = directory.splitlines() + self.assertEquals(len(lines), 4) # Number of files + header + + self.assert_('File Name' in lines[0]) + self.assert_('Modified' in lines[0]) + self.assert_('Size' in lines[0]) + + fn, date, time, size = lines[1].split() + self.assertEquals(fn, 'another.name') + # XXX: timestamp is not tested + self.assertEquals(size, str(len(self.data))) + + # Check the namelist + names = zipfp.namelist() + self.assertEquals(len(names), 3) + self.assert_(TESTFN in names) + self.assert_("another"+os.extsep+"name" in names) + self.assert_("strfile" in names) + + # Check infolist + infos = zipfp.infolist() + names = [ i.filename for i in infos ] + self.assertEquals(len(names), 3) + self.assert_(TESTFN in names) + self.assert_("another"+os.extsep+"name" in names) + self.assert_("strfile" in names) + for i in infos: + self.assertEquals(i.file_size, len(self.data)) + + # check getinfo + for nm in (TESTFN, "another"+os.extsep+"name", "strfile"): + info = zipfp.getinfo(nm) + self.assertEquals(info.filename, nm) + self.assertEquals(info.file_size, len(self.data)) + + # Check that testzip doesn't raise an exception + zipfp.testzip() + + + zipfp.close() + + + + + def testStored(self): + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.zipTest(f, zipfile.ZIP_STORED) + + if zlib: + def testDeflated(self): + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.zipTest(f, zipfile.ZIP_DEFLATED) + + def testAbsoluteArcnames(self): + zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) + zipfp.write(TESTFN, "/absolute") + zipfp.close() + + zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED) + self.assertEqual(zipfp.namelist(), ["absolute"]) + zipfp.close() + + + def tearDown(self): + os.remove(TESTFN) + os.remove(TESTFN2) + +class TestZip64InSmallFiles(unittest.TestCase): + # These tests test the ZIP64 functionality without using large files, + # see test_zipfile64 for proper tests. + + def setUp(self): + self._limit = zipfile.ZIP64_LIMIT + zipfile.ZIP64_LIMIT = 5 + + line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000)) + self.data = '\n'.join(line_gen) + + # Make a source file with some lines + fp = open(TESTFN, "wb") + fp.write(self.data) + fp.close() + + def largeFileExceptionTest(self, f, compression): + zipfp = zipfile.ZipFile(f, "w", compression) + self.assertRaises(zipfile.LargeZipFile, + zipfp.write, TESTFN, "another"+os.extsep+"name") + zipfp.close() + + def largeFileExceptionTest2(self, f, compression): + zipfp = zipfile.ZipFile(f, "w", compression) + self.assertRaises(zipfile.LargeZipFile, + zipfp.writestr, "another"+os.extsep+"name", self.data) + zipfp.close() + + def testLargeFileException(self): + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.largeFileExceptionTest(f, zipfile.ZIP_STORED) + self.largeFileExceptionTest2(f, zipfile.ZIP_STORED) + + def zipTest(self, f, compression): + # Create the ZIP archive + zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True) + zipfp.write(TESTFN, "another"+os.extsep+"name") + zipfp.write(TESTFN, TESTFN) + zipfp.writestr("strfile", self.data) + zipfp.close() + + # Read the ZIP archive + zipfp = zipfile.ZipFile(f, "r", compression) + self.assertEqual(zipfp.read(TESTFN), self.data) + self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data) + self.assertEqual(zipfp.read("strfile"), self.data) + + # Print the ZIP directory + fp = StringIO() + stdout = sys.stdout + try: + sys.stdout = fp + + zipfp.printdir() + finally: + sys.stdout = stdout + + directory = fp.getvalue() + lines = directory.splitlines() + self.assertEquals(len(lines), 4) # Number of files + header + + self.assert_('File Name' in lines[0]) + self.assert_('Modified' in lines[0]) + self.assert_('Size' in lines[0]) + + fn, date, time, size = lines[1].split() + self.assertEquals(fn, 'another.name') + # XXX: timestamp is not tested + self.assertEquals(size, str(len(self.data))) + + # Check the namelist + names = zipfp.namelist() + self.assertEquals(len(names), 3) + self.assert_(TESTFN in names) + self.assert_("another"+os.extsep+"name" in names) + self.assert_("strfile" in names) + + # Check infolist + infos = zipfp.infolist() + names = [ i.filename for i in infos ] + self.assertEquals(len(names), 3) + self.assert_(TESTFN in names) + self.assert_("another"+os.extsep+"name" in names) + self.assert_("strfile" in names) + for i in infos: + self.assertEquals(i.file_size, len(self.data)) + + # check getinfo + for nm in (TESTFN, "another"+os.extsep+"name", "strfile"): + info = zipfp.getinfo(nm) + self.assertEquals(info.filename, nm) + self.assertEquals(info.file_size, len(self.data)) + + # Check that testzip doesn't raise an exception + zipfp.testzip() + + + zipfp.close() + + def testStored(self): + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.zipTest(f, zipfile.ZIP_STORED) + + + if zlib: + def testDeflated(self): + for f in (TESTFN2, TemporaryFile(), StringIO()): + self.zipTest(f, zipfile.ZIP_DEFLATED) + + def testAbsoluteArcnames(self): + zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True) + zipfp.write(TESTFN, "/absolute") + zipfp.close() + + zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED) + self.assertEqual(zipfp.namelist(), ["absolute"]) + zipfp.close() + + + def tearDown(self): + zipfile.ZIP64_LIMIT = self._limit + os.remove(TESTFN) + os.remove(TESTFN2) + +class PyZipFileTests(unittest.TestCase): + def testWritePyfile(self): + zipfp = zipfile.PyZipFile(TemporaryFile(), "w") + fn = __file__ + if fn.endswith('.pyc') or fn.endswith('.pyo'): + fn = fn[:-1] + + zipfp.writepy(fn) + + bn = os.path.basename(fn) + self.assert_(bn not in zipfp.namelist()) + self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist()) + zipfp.close() + + + zipfp = zipfile.PyZipFile(TemporaryFile(), "w") + fn = __file__ + if fn.endswith('.pyc') or fn.endswith('.pyo'): + fn = fn[:-1] + + zipfp.writepy(fn, "testpackage") + + bn = "%s/%s"%("testpackage", os.path.basename(fn)) + self.assert_(bn not in zipfp.namelist()) + self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist()) + zipfp.close() + + def testWritePythonPackage(self): + import email + packagedir = os.path.dirname(email.__file__) + + zipfp = zipfile.PyZipFile(TemporaryFile(), "w") + zipfp.writepy(packagedir) + + # Check for a couple of modules at different levels of the hieararchy + names = zipfp.namelist() + self.assert_('email/__init__.pyo' in names or 'email/__init__.pyc' in names) + self.assert_('email/mime/text.pyo' in names or 'email/mime/text.pyc' in names) + + def testWritePythonDirectory(self): + os.mkdir(TESTFN2) + try: + fp = open(os.path.join(TESTFN2, "mod1.py"), "w") + fp.write("print 42\n") + fp.close() + + fp = open(os.path.join(TESTFN2, "mod2.py"), "w") + fp.write("print 42 * 42\n") + fp.close() + + fp = open(os.path.join(TESTFN2, "mod2.txt"), "w") + fp.write("bla bla bla\n") + fp.close() + + zipfp = zipfile.PyZipFile(TemporaryFile(), "w") + zipfp.writepy(TESTFN2) + + names = zipfp.namelist() + self.assert_('mod1.pyc' in names or 'mod1.pyo' in names) + self.assert_('mod2.pyc' in names or 'mod2.pyo' in names) + self.assert_('mod2.txt' not in names) + + finally: + shutil.rmtree(TESTFN2) + + + +class OtherTests(unittest.TestCase): + def testCloseErroneousFile(self): + # This test checks that the ZipFile constructor closes the file object + # it opens if there's an error in the file. If it doesn't, the traceback + # holds a reference to the ZipFile object and, indirectly, the file object. + # On Windows, this causes the os.unlink() call to fail because the + # underlying file is still open. This is SF bug #412214. + # + fp = open(TESTFN, "w") + fp.write("this is not a legal zip file\n") + fp.close() + try: + zf = zipfile.ZipFile(TESTFN) + except zipfile.BadZipfile: + os.unlink(TESTFN) + + def testNonExistentFileRaisesIOError(self): + # make sure we don't raise an AttributeError when a partially-constructed + # ZipFile instance is finalized; this tests for regression on SF tracker + # bug #403871. + + # The bug we're testing for caused an AttributeError to be raised + # when a ZipFile instance was created for a file that did not + # exist; the .fp member was not initialized but was needed by the + # __del__() method. Since the AttributeError is in the __del__(), + # it is ignored, but the user should be sufficiently annoyed by + # the message on the output that regression will be noticed + # quickly. + self.assertRaises(IOError, zipfile.ZipFile, TESTFN) + + def testClosedZipRaisesRuntimeError(self): + # Verify that testzip() doesn't swallow inappropriate exceptions. + data = StringIO() + zipf = zipfile.ZipFile(data, mode="w") + zipf.writestr("foo.txt", "O, for a Muse of Fire!") + zipf.close() + + # This is correct; calling .read on a closed ZipFile should throw + # a RuntimeError, and so should calling .testzip. An earlier + # version of .testzip would swallow this exception (and any other) + # and report that the first file in the archive was corrupt. + self.assertRaises(RuntimeError, zipf.testzip) + +def test_main(): + run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests) + #run_unittest(TestZip64InSmallFiles) + +if __name__ == "__main__": + test_main() Added: trunk/jython/Lib/zipfile.py =================================================================== --- trunk/jython/Lib/zipfile.py (rev 0) +++ trunk/jython/Lib/zipfile.py 2008-11-24 05:01:02 UTC (rev 5629) @@ -0,0 +1,900 @@ +""" +Read and write ZIP files. +""" +import struct, os, time, sys +import binascii, cStringIO + +try: + import zlib # We may need its compression method +except ImportError: + zlib = None + +__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", + "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] + +class BadZipfile(Exception): + pass + + +class LargeZipFile(Exception): + """ + Raised when writing a zipfile, the zipfile requires ZIP64 extensions + and those extensions are disabled. + """ + +error = BadZipfile # The exception raised by this module + +ZIP64_LIMIT= (1 << 31) - 1 + +# constants for Zip file compression methods +ZIP_STORED = 0 +ZIP_DEFLATED = 8 +# Other ZIP compression methods not supported + +# Here are some struct module formats for reading headers +structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes +stringEndArchive = "PK\005\006" # magic number for end of archive record +structCentralDir = "<4s4B4HlLL5HLL"# 19 items, central directory, 46 bytes +stringCentralDir = "PK\001\002" # magic number for central directory +structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes +stringFileHeader = "PK\003\004" # magic number for file header +structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes +stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header +structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes +stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header + + +# indexes of entries in the central directory structure +_CD_SIGNATURE = 0 +_CD_CREATE_VERSION = 1 +_CD_CREATE_SYSTEM = 2 +_CD_EXTRACT_VERSION = 3 +_CD_EXTRACT_SYSTEM = 4 # is this meaningful? +_CD_FLAG_BITS = 5 +_CD_COMPRESS_TYPE = 6 +_CD_TIME = 7 +_CD_DATE = 8 +_CD_CRC = 9 +_CD_COMPRESSED_SIZE = 10 +_CD_UNCOMPRESSED_SIZE = 11 +_CD_FILENAME_LENGTH = 12 +_CD_EXTRA_FIELD_LENGTH = 13 +_CD_COMMENT_LENGTH = 14 +_CD_DISK_NUMBER_START = 15 +_CD_INTERNAL_FILE_ATTRIBUTES = 16 +_CD_EXTERNAL_FILE_ATTRIBUTES = 17 +_CD_LOCAL_HEADER_OFFSET = 18 + +# indexes of entries in the local file header structure +_FH_SIGNATURE = 0 +_FH_EXTRACT_VERSION = 1 +_FH_EXTRACT_SYSTEM = 2 # is this meaningful? +_FH_GENERAL_PURPOSE_FLAG_BITS = 3 +_FH_COMPRESSION_METHOD = 4 +_FH_LAST_MOD_TIME = 5 +_FH_LAST_MOD_DATE = 6 +_FH_CRC = 7 +_FH_COMPRESSED_SIZE = 8 +_FH_UNCOMPRESSED_SIZE = 9 +_FH_FILENAME_LENGTH = 10 +_FH_EXTRA_FIELD_LENGTH = 11 + +def is_zipfile(filename): + """Quickly see if file is a ZIP file by checking the magic number.""" + try: + fpin = open(filename, "rb") + endrec = _EndRecData(fpin) + fpin.close() + if endrec: + return True # file has correct magic number + except IOError: + pass + return False + +def _EndRecData64(fpin, offset, endrec): + """ + Read the ZIP64 end-of-archive records and use that to update endrec + """ + locatorSize = struct.calcsize(structEndArchive64Locator) + fpin.seek(offset - locatorSize, 2) + data = fpin.read(locatorSize) + sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) + if sig != stringEndArchive64Locator: + return endrec + + if diskno != 0 or disks != 1: + raise BadZipfile("zipfiles that span multiple disks are not supported") + + # Assume no 'zip64 extensible data' + endArchiveSize = struct.calcsize(structEndArchive64) + fpin.seek(offset - locatorSize - endArchiveSize, 2) + data = fpin.read(endArchiveSize) + sig, sz, create_version, read_version, disk_num, disk_dir, \ + dircount, dircount2, dirsize, diroffset = \ + struct.unpack(structEndArchive64, data) + if sig != stringEndArchive64: + return endrec + + # Update the original endrec using data from the ZIP64 record + endrec[1] = disk_num + endrec[2] = disk_dir + endrec[3] = dircount + endrec[4] = dircount2 + endrec[5] = dirsize + endrec[6] = diroffset + return endrec + + +def _EndRecData(fpin): + """Return data from the "End of Central Directory" record, or None. + + The data is a list of the nine items in the ZIP "End of central dir" + record followed by a tenth item, the file seek offset of this record.""" + fpin.seek(-22, 2) # Assume no archive comment. + filesize = fpin.tell() + 22 # Get file size + data = fpin.read() + if data[0:4] == stringEndArchive and data[-2:] == "\000\000": + endrec = struct.unpack(structEndArchive, data) + endrec = list(endrec) + endrec.append("") # Append the archive comment + endrec.append(filesize - 22) # Append the record start offset + if endrec[-4] == -1 or endrec[-4] == 0xffffffff: + return _EndRecData64(fpin, -22, endrec) + return endrec + # Search the last END_BLOCK bytes of the file for the record signature. + # The comment is appended to the ZIP file and has a 16 bit length. + # So the comment may be up to 64K long. We limit the search for the + # signature to a few Kbytes at the end of the file for efficiency. + # also, the signature must not appear in the comment. + END_BLOCK = min(filesize, 1024 * 4) + fpin.seek(filesize - END_BLOCK, 0) + data = fpin.read() + start = data.rfind(stringEndArchive) + if start >= 0: # Correct signature string was found + endrec = struct.unpack(structEndArchive, data[start:start+22]) + endrec = list(endrec) + comment = data[start+22:] + if endrec[7] == len(comment): # Comment length checks out + # Append the archive comment and start offset + endrec.append(comment) + endrec.append(filesize - END_BLOCK + start) + if endrec[-4] == -1 or endrec[-4] == 0xffffffff: + return _EndRecData64(fpin, - END_BLOCK + start, endrec) + return endrec + return # Error, return None + + +class ZipInfo (object): + """Class with attributes describing each file in the ZIP archive.""" + + __slots__ = ( + 'orig_filename', + 'filename', + 'date_time', + 'compress_type', + 'comment', + 'extra', + 'create_system', + 'create_version', + 'extract_version', + 'reserved', + 'flag_bits', + 'volume', + 'internal_attr', + 'external_attr', + 'header_offset', + 'CRC', + 'compress_size', + 'file_size', + ) + + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): + self.orig_filename = filename # Original file name in archive + + # Terminate the file name at the first null byte. Null bytes in file + # names are used as tricks by viruses in archives. + null_byte = filename.find(chr(0)) + if null_byte >= 0: + filename = filename[0:null_byte] + # This is used to ensure paths in generated ZIP files always use + # forward slashes as the directory separator, as required by the + # ZIP format specification. + if os.sep != "/" and os.sep in filename: + filename = filename.replace(os.sep, "/") + + self.filename = filename # Normalized file name + self.date_time = date_time # year, month, day, hour, min, sec + # Standard values: + self.compress_type = ZIP_STORED # Type of compression for the file + self.comment = "" # Comment for each file + self.extra = "" # ZIP extra data + if sys.platform == 'win32': + self.create_system = 0 # System which created ZIP archive + else: + # Assume everything else is unix-y + self.create_system = 3 # System which created ZIP archive + self.create_version = 20 # Version which created ZIP archive + self.extract_version = 20 # Version needed to extract archive + self.reserved = 0 # Must be zero + self.flag_bits = 0 # ZIP flag bits + self.volume = 0 # Volume number of file header + self.internal_attr = 0 # Internal attributes + self.external_attr = 0 # External file attributes + # Other attributes are set by class ZipFile: + # header_offset Byte offset to the file header + # CRC CRC-32 of the uncompressed file + # compress_size Size of the compressed file + # file_size Size of the uncompressed file + + def FileHeader(self): + """Return the per-file header as a string.""" + dt = self.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + if self.flag_bits & 0x08: + # Set these to zero because we write them after the file data + CRC = compress_size = file_size = 0 + else: + CRC = self.CRC + compress_size = self.compress_size + file_size = self.file_size + + extra = self.extra + + if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: + # File is larger than what fits into a 4 byte integer, + # fall back to the ZIP64 extension + fmt = '<hhqq' + extra = extra + struct.pack(fmt, + 1, struct.calcsize(fmt)-4, file_size, compress_size) + file_size = 0xffffffff # -1 + compress_size = 0xffffffff # -1 + self.extract_version = max(45, self.extract_version) + self.create_version = max(45, self.extract_version) + + header = struct.pack(structFileHeader, stringFileHeader, + self.extract_version, self.reserved, self.flag_bits, + self.compress_type, dostime, dosdate, CRC, + compress_size, file_size, + len(self.filename), len(extra)) + return header + self.filename + extra + + def _decodeExtra(self): + # Try to decode the extra field. + extra = self.extra + unpack = struct.unpack + while extra: + tp, ln = unpack('<hh', extra[:4]) + if tp == 1: + if ln >= 24: + counts = unpack('<qqq', extra[4:28]) + elif ln == 16: + counts = unpack('<qq', extra[4:20]) + elif ln == 8: + counts = unpack('<q', extra[4:12]) + elif ln == 0: + counts = () + else: + raise RuntimeError, "Corrupt extra field %s"%(ln,) + + idx = 0 + + # ZIP64 extension (large files and/or large archives) + if self.file_size == -1 or self.file_size == 0xFFFFFFFFL: + self.file_size = counts[idx] + idx += 1 + + if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL: + self.compress_size = counts[idx] + idx += 1 + + if self.header_offset == -1 or self.header_offset == 0xffffffffL: + old = self.header_offset + self.header_offset = counts[idx] + idx+=1 + + extra = extra[ln+4:] + + +class ZipFile: + """ Class with methods to open, read, write, close, list zip files. + + z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True) + + file: Either the path to the file, or a file-like object. + If it is a path, the file will be opened and closed by ZipFile. + mode: The mode can be either read "r", write "w" or append "a". + compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). + allowZip64: if True ZipFile will create files with ZIP64 extensions when + needed, otherwise it will raise an exception when this would + be necessary. + + """ + + fp = None # Set here since __del__ checks it + + def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): + """Open the ZIP file with mode read "r", write "w" or append "a".""" + self._allowZip64 = allowZip64 + self._didModify = False + if compression == ZIP_STORED: + pass + elif compression == ZIP_DEFLATED: + if not zlib: + raise RuntimeError,\ + "Compression requires the (missing) zlib module" + else: + raise RuntimeError, "That compression method is not supported" + self.debug = 0 # Level of printing: 0 through 3 + self.NameToInfo = {} # Find file info given name + self.filelist = [] # List of ZipInfo instances for archive + self.compression = compression # Method of compression + self.mode = key = mode.replace('b', '')[0] + + # Check if we were passed a file-like object + if isinstance(file, basestring): + self._filePassed = 0 + self.filename = file + modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} + self.fp = open(file, modeDict[mode]) + else: + self._filePassed = 1 + self.fp = file + self.filename = getattr(file, 'name', None) + + if key == 'r': + self._GetContents() + elif key == 'w': + pass + elif key == 'a': + try: # See if file is a zip file + self._RealGetContents() + # seek to start of directory and overwrite + self.fp.seek(self.start_dir, 0) + except BadZipfile: # file is not a zip file, just append + self.fp.seek(0, 2) + else: + if not self._filePassed: + self.fp.close() + self.fp = None + raise RuntimeError, 'Mode must be "r", "w" or "a"' + + def _GetContents(self): + """Read the directory, making sure we close the file if the format + is bad.""" + try: + self._RealGetContents() + except BadZipfile: + if not self._filePassed: + self.fp.close() + self.fp = None + raise + + def _RealGetContents(self): + """Read in the table of contents for the ZIP file.""" + fp = self.fp + endrec = _EndRecData(fp) + if not endrec: + raise BadZipfile, "File is not a zip file" + if self.debug > 1: + print endrec + size_cd = endrec[5] # bytes in central directory + offset_cd = endrec[6] # offset of central directory + self.comment = endrec[8] # archive comment + # endrec[9] is the offset of the "End of Central Dir" record + if endrec[9] > ZIP64_LIMIT: + x = endrec[9] - size_cd - 56 - 20 + else: + x = endrec[9] - size_cd + # "concat" is zero, unless zip was concatenated to another file + concat = x - offset_cd + if self.debug > 2: + print "given, inferred, offset", offset_cd, x, concat + # self.start_dir: Position of start of central directory + self.start_dir = offset_cd + concat + fp.seek(self.start_dir, 0) + data = fp.read(size_cd) + fp = cStringIO.StringIO(data) + total = 0 + while total < size_cd: + centdir = fp.read(46) + total = total + 46 + if centdir[0:4] != stringCentralDir: + raise BadZipfile, "Bad magic number for central directory" + centdir = struct.unpack(structCentralDir, centdir) + if self.debug > 2: + print centdir + filename = fp.read(centdir[_CD_FILENAME_LENGTH]) + # Create ZipInfo instance to store file information + x = ZipInfo(filename) + x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) + x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) + total = (total + centdir[_CD_FILENAME_LENGTH] + + centdir[_CD_EXTRA_FIELD_LENGTH] + + centdir[_CD_COMMENT_LENGTH]) + x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + (x.create_version, x.create_system, x.extract_version, x.reserved, + x.flag_bits, x.compress_type, t, d, + x.CRC, x.compress_size, x.file_size) = centdir[1:12] + x.volume, x.internal_attr, x.external_attr = centdir[15:18] + # Convert date/time code to (year, month, day, hour, min, sec) + x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, + t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) + + x._decodeExtra() + x.header_offset = x.header_offset + concat + self.filelist.append(x) + self.NameToInfo[x.filename] = x + if self.debug > 2: + print "total", total + + + def namelist(self): + """Return a list of file names in the archive.""" + l = [] + for data in self.filelist: + l.append(data.filename) + return l + + def infolist(self): + """Return a list of class ZipInfo instances for files in the + archive.""" + return self.filelist + + def printdir(self): + """Print a table of contents for the zip file.""" + print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") + for zinfo in self.filelist: + date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] + print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) + + def testzip(self): + """Read all the files and check the CRC.""" + for zinfo in self.filelist: + try: + self.read(zinfo.filename) # Check CRC-32 + except BadZipfile: + return zinfo.filename + + + def getinfo(self, name): + """Return the instance of ZipInfo given 'name'.""" + return self.NameToInfo[name] + + def read(self, name): + """Return file bytes (as a string) for name.""" + if self.mode not in ("r", "a"): + raise RuntimeError, 'read() requires mode "r" or "a"' + if not self.fp: + raise RuntimeError, \ + "Attempt to read ZIP archive that was already closed" + zinfo = self.getinfo(name) + filepos = self.fp.tell() + + self.fp.seek(zinfo.header_offset, 0) + + # Skip the file header: + fheader = self.fp.read(30) + if fheader[0:4] != stringFileHeader: + raise BadZipfile, "Bad magic number for file header" + + fheader = struct.unpack(structFileHeader, fheader) + fname = self.fp.read(fheader[_FH_FILENAME_LENGTH]) + if fheader[_FH_EXTRA_FIELD_LENGTH]: + self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH]) + + if fname != zinfo.orig_filename: + raise BadZipfile, \ + 'File name in directory "%s" and header "%s" differ.' % ( + zinfo.orig_filename, fname) + + bytes = self.fp.read(zinfo.compress_size) + self.fp.seek(filepos, 0) + if zinfo.compress_type == ZIP_STORED: + pass + elif zinfo.compress_type == ZIP_DEFLATED: + if not zlib: + raise RuntimeError, \ + "De-compression requires the (missing) zlib module" + # zlib compress/decompress code by Jeremy Hylton of CNRI + dc = zlib.decompressobj(-15) + bytes = dc.decompress(bytes) + # need to feed in unused pad byte so that zlib won't choke + ex = dc.decompress('Z') + dc.flush() + if ex: + bytes = bytes + ex + else: + raise BadZipfile, \ + "Unsupported compression method %d for file %s" % \ + (zinfo.compress_type, name) + crc = binascii.crc32(bytes) + if crc != zinfo.CRC: + raise BadZipfile, "Bad CRC-32 for file %s" % name + return bytes + + def _writecheck(self, zinfo): + """Check for errors before writing a file to the archive.""" + if zinfo.filename in self.NameToInfo: + if self.debug: # Warning for duplicate names + print "Duplicate name:", zinfo.filename + if self.mode not in ("w", "a"): + raise RuntimeError, 'write() requires mode "w" or "a"' + if not self.fp: + raise RuntimeError, \ + "Attempt to write ZIP archive that was already closed" + if zinfo.compress_type == ZIP_DEFLATED and not zlib: + raise RuntimeError, \ + "Compression requires the (missing) zlib module" + if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): + raise RuntimeError, \ + "That compression method is not supported" + if zinfo.file_size > ZIP64_LIMIT: + if not self._allowZip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + if zinfo.header_offset > ZIP64_LIMIT: + if not self._allowZip64: + raise LargeZipFile("Zipfile size would require ZIP64 extensions") + + def write(self, filename, arcname=None, compress_type=None): + """Put the bytes from filename into the archive under the name + arcname.""" + st = os.stat(filename) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + zinfo = ZipInfo(arcname, date_time) + zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes + if compress_type is None: + zinfo.compress_type = self.compression + else: + zinfo.compress_type = compress_type + + zinfo.file_size = st.st_size + zinfo.flag_bits = 0x00 + zinfo.header_offset = self.fp.tell() # Start of header bytes + + self._writecheck(zinfo) + self._didModify = True + fp = open(filename, "rb") + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + zinfo.file_size = file_size = 0 + self.fp.write(zinfo.FileHeader()) + if zinfo.compress_type == ZIP_DEFLATED: + cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + else: + cmpr = None + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = binascii.crc32(buf, CRC) + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + self.fp.write(buf) + fp.close() + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + # Seek backwards and write CRC and file sizes + position = self.fp.tell() # Preserve current position in file + self.fp.seek(zinfo.header_offset + 14, 0) + self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size, + zinfo.file_size)) + self.fp.seek(position, 0) + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + + def writestr(self, zinfo_or_arcname, bytes): + """Write a file into the archive. The contents is the string + 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or + the name of the file in the archive.""" + if not isinstance(zinfo_or_arcname, ZipInfo): + zinfo = ZipInfo(filename=zinfo_or_arcname, + date_time=time.localtime(time.time())[:6]) + zinfo.compress_type = self.compression + else: + zinfo = zinfo_or_arcname + zinfo.file_size = len(bytes) # Uncompressed size + zinfo.header_offset = self.fp.tell() # Start of header bytes + self._writecheck(zinfo) + self._didModify = True + zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum + if zinfo.compress_type == ZIP_DEFLATED: + co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + bytes = co.compress(bytes) + co.flush() + zinfo.compress_size = len(bytes) # Compressed size + else: + zinfo.compress_size = zinfo.file_size + zinfo.header_offset = self.fp.tell() # Start of header bytes + self.fp.write(zinfo.FileHeader()) + self.fp.write(bytes) + self.fp.flush() + if zinfo.flag_bits & 0x08: + # Write CRC and file sizes after the file data + self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size, + zinfo.file_size)) + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + + def __del__(self): + """Call the "close()" method in case the user forgot.""" + self.close() + + def close(self): + """Close the file, and for mode "w" and "a" write the ending + records.""" + if self.fp is None: + return + + if self.mode in ("w", "a") and self._didModify: # write ending records + count = 0 + pos1 = self.fp.tell() + for zinfo in self.filelist: # write central directory + count = count + 1 + dt = zinfo.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff #-1 + compress_size = 0xffffffff #-1 + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = -1 # struct "l" format: 32 one bits + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + '<hh' + 'q'*len(extra), + 1, 8*len(extra), *extra) + extra_data + + extract_version = max(45, zinfo.extract_version) + create_version = max(45, zinfo.create_version) + else: + extract_version = zinfo.extract_version + create_version = zinfo.create_version + + centdir = struct.pack(structCentralDir, + stringCentralDir, create_version, + zinfo.create_system, extract_version, zinfo.reserved, + zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, + zinfo.CRC, compress_size, file_size, + len(zinfo.filename), len(extra_data), len(zinfo.comment), + 0, zinfo.internal_attr, zinfo.external_attr, + header_offset) + self.fp.write(centdir) + self.fp.write(zinfo.filename) + self.fp.write(extra_data) + self.fp.write(zinfo.comment) + + pos2 = self.fp.tell() + # Write end-of-zip-archive record + if pos1 > ZIP64_LIMIT: + # Need to write the ZIP64 end-of-archive records + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) + self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + self.fp.write(zip64locrec) + + # XXX Why is `pos3` computed next? It's never referenced. + pos3 = self.fp.tell() + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, count, count, pos2 - pos1, -1, 0) + self.fp.write(endrec) + + else: + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, count, count, pos2 - pos1, pos1, 0) + self.fp.write(endrec) + self.fp.flush() + if not self._filePassed: + self.fp.close() + self.fp = None + + +class PyZipFile(ZipFile): + """Class to create ZIP archives with Python library files and packages.""" + + def writepy(self, pathname, basename = ""): + """Add all files from "pathname" to the ZIP archive. + + If pathname is a package directory, search the directory and + all package subdirectories recursively for all *.py and enter + the modules into the archive. If pathname is a plain + directory, listdir *.py and enter all modules. Else, pathname + must be a Python *.py file and the module will be put into the + archive. Added modules are always module.pyo or module.pyc. + This method will compile the module.py into module.pyc if + necessary. + """ + dir, name = os.path.split(pathname) + if os.path.isdir(pathname): + initname = os.path.join(pathname, "__init__.py") + if os.path.isfile(initname): + # This is a package directory, add it + if basename: + basename = "%s/%s" % (basename, name) + else: + basename = name + if self.debug: + print "Adding package in", pathname, "as", basename + fname, arcname = self._get_codename(initname[0:-3], basename) + if self.debug: + print "Adding", arcname + self.write(fname, arcname) + dirlist = os.listdir(pathname) + dirlist.remove("__init__.py") + # Add all *.py files and package subdirectories + for filename in dirlist: + path = os.path.join(pathname, filename) + root, ext = os.path.splitext(filename) + if os.path.isdir(path): + if os.path.isfile(os.path.join(path, "__init__.py")): + # This is a package directory, add it + self.writepy(path, basename) # Recursive call + elif ext == ".py": + fname, arcname = self._get_codename(path[0:-3], + basename) + if self.debug: + print "Adding", arcname + self.write(fname, arcname) + else: + # This is NOT a package directory, add its files at top level + if self.debug: + print "Adding files from directory", pathname + for filename in os.listdir(pathname): + path = os.path.join(pathname, filename) + root, ext = os.path.splitext(filename) + if ext == ".py": + fname, arcname = self._get_codename(path[0:-3], + basename) + if self.debug: + print "Adding", arcname + self.write(fname, arcname) + else: + if pathname[-3:] != ".py": + raise RuntimeError, \ + 'Files added with writepy() must end with ".py"' + fname, arcname = self._get_codename(pathname[0:-3], basename) + if self.debug: + print "Adding file", arcname + self.write(fname, arcname) + + def _get_codename(self, pathname, basename): + """Return (filename, archivename) for the path. + + Given a module name path, return the correct file path and + archive name, compiling if necessary. For example, given + /python/lib/string, return (/python/lib/string.pyc, string). + """ + file_py = pathname + ".py" + file_pyc = pathname + ".pyc" + file_pyo = pathname + ".pyo" + if os.path.isfile(file_pyo) and \ + os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: + fname = file_pyo # Use .pyo file + elif not os.path.isfile(file_pyc) or \ + os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: + import py_compile + if self.debug: + print "Compiling", file_py + try: + py_compile.compile(file_py, file_pyc, None, True) + except py_compile.PyCompileError,err: + print err.msg + fname = file_pyc + else: + fname = file_pyc + archivename = os.path.split(fname)[1] + if basename: + archivename = "%s/%s" % (basename, archivename) + return (fname, archivename) + + +def main(args = None): + import textwrap + USAGE=textwrap.dedent("""\ + Usage: + zipfile.py -l zipfile.zip # Show listing of a zipfile + zipfile.py -t zipfile.zip # Test if a zipfile is valid + zipfile.py -e zipfile.zip target # Extract zipfile into target dir + zipfile.py -c zipfile.zip src ... # Create zipfile from sources + """) + if args is None: + args = sys.argv[1:] + + if not args or args[0] not in ('-l', '-c', '-e', '-t'): + print USAGE + sys.exit(1) + + if args[0] == '-l': + if len(args) != 2: + print USAGE + sys.exit(1) + zf = ZipFile(args[1], 'r') + zf.printdir() + zf.close() + + elif args[0] == '-t': + if len(args) != 2: + print USAGE + sys.exit(1) + zf = ZipFile(args[1], 'r') + zf.testzip() + print "Done testing" + + elif args[0] == '-e': + if len(args) != 3: + print USAGE + sys.exit(1) + + zf = ZipFile(args[1], 'r') + out = args[2] + for path in zf.namelist(): + if path.startswith('./'): + tgt = os.path.join(out, path[2:]) + else: + tgt = os.path.join(out, path) + + tgtdir = os.path.dirname(tgt) + if not os.path.exists(tgtdir): + os.makedirs(tgtdir) + fp = open(tgt, 'wb') + fp.write(zf.read(path)) + fp.close() + zf.close() + + elif args[0] == '-c': + if len(args) < 3: + print USAGE + sys.exit(1) + + def addToZip(zf, path, zippath): + if os.path.isfile(path): + zf.write(path, zippath, ZIP_DEFLATED) + elif os.path.isdir(path): + for nm in os.listdir(path): + addToZip(zf, + os.path.join(path, nm), os.path.join(zippath, nm)) + # else: ignore + + zf = ZipFile(args[1], 'w', allowZip64=True) + for src in args[2:]: + addToZip(zf, src, os.path.basename(src)) + + zf.close() + +if __name__ == "__main__": + main() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |