From: <lu...@us...> - 2008-02-27 10:57:16
|
Revision: 166 http://s3tools.svn.sourceforge.net/s3tools/?rev=166&view=rev Author: ludvigm Date: 2008-02-27 02:57:11 -0800 (Wed, 27 Feb 2008) Log Message: ----------- * S3/S3.py: modify 'x-amz-date' format (problems reported on MacOS X). Thanks Jon Larkowski for fix. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/S3.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-02-27 10:45:59 UTC (rev 165) +++ s3cmd/trunk/ChangeLog 2008-02-27 10:57:11 UTC (rev 166) @@ -5,6 +5,8 @@ * s3cmd: Fix crash when 'sync'ing files with unresolvable owner uid/gid. * S3/S3.py, S3/Utils.py: open files in binary mode (otherwise windows users have problems). + * S3/S3.py: modify 'x-amz-date' format (problems reported on MacOS X). + Thanks Jon Larkowski for fix. 2008-02-27 Michal Ludvig <mi...@lo...> Modified: s3cmd/trunk/S3/S3.py =================================================================== --- s3cmd/trunk/S3/S3.py 2008-02-27 10:45:59 UTC (rev 165) +++ s3cmd/trunk/S3/S3.py 2008-02-27 10:57:11 UTC (rev 166) @@ -317,7 +317,7 @@ del(headers["date"]) if not headers.has_key("x-amz-date"): - headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S %z", time.gmtime(time.time())) + headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"]) signature = self.sign_headers(method_string, resource, headers) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-02-27 11:09:26
|
Revision: 167 http://s3tools.svn.sourceforge.net/s3tools/?rev=167&view=rev Author: ludvigm Date: 2008-02-27 03:09:21 -0800 (Wed, 27 Feb 2008) Log Message: ----------- * S3/PkgInfo.py: bumped up version to 0.9.6 * NEWS: What's new in 0.9.6 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/NEWS s3cmd/trunk/S3/PkgInfo.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-02-27 10:57:11 UTC (rev 166) +++ s3cmd/trunk/ChangeLog 2008-02-27 11:09:21 UTC (rev 167) @@ -1,3 +1,13 @@ +2008-02-28 Michal Ludvig <mi...@lo...> + + * Released version 0.9.5 + ---------------------- + +2008-02-28 Michal Ludvig <mi...@lo...> + + * S3/PkgInfo.py: bumped up version to 0.9.6 + * NEWS: What's new in 0.9.6 + 2008-02-27 Michal Ludvig <mi...@lo...> * s3cmd, s3cmd.1: Updated help and man page. Modified: s3cmd/trunk/NEWS =================================================================== --- s3cmd/trunk/NEWS 2008-02-27 10:57:11 UTC (rev 166) +++ s3cmd/trunk/NEWS 2008-02-27 11:09:21 UTC (rev 167) @@ -1,3 +1,13 @@ +s3cmd 0.9.6 - 2008-02-28 +=========== +* Support for setting / guessing MIME-type of uploaded file +* Correctly follow redirects when accessing buckets created + in Europe. +* Introduced 'info' command both for buckets and objects +* Correctly display public URL on uploads +* Updated TODO list for everyone to see where we're heading +* Various small fixes. See ChangeLog for details. + s3cmd 0.9.5 - 2007-11-13 =========== * Support for buckets created in Europe Modified: s3cmd/trunk/S3/PkgInfo.py =================================================================== --- s3cmd/trunk/S3/PkgInfo.py 2008-02-27 10:57:11 UTC (rev 166) +++ s3cmd/trunk/S3/PkgInfo.py 2008-02-27 11:09:21 UTC (rev 167) @@ -1,5 +1,5 @@ package = "s3cmd" -version = "0.9.5" +version = "0.9.6" url = "http://s3tools.logix.cz" license = "GPL version 2" short_description = "S3cmd is a tool for managing Amazon S3 storage space." This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-03-04 12:48:18
|
Revision: 172 http://s3tools.svn.sourceforge.net/s3tools/?rev=172&view=rev Author: ludvigm Date: 2008-03-04 04:48:10 -0800 (Tue, 04 Mar 2008) Log Message: ----------- * s3cmd, S3/S3.py, S3/Utils.py: Throttle upload speed and retry when upload failed. Report download/upload speed and time elapsed. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/S3.py s3cmd/trunk/S3/Utils.py s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-03-01 09:20:00 UTC (rev 171) +++ s3cmd/trunk/ChangeLog 2008-03-04 12:48:10 UTC (rev 172) @@ -1,3 +1,9 @@ +2008-03-05 Michal Ludvig <mi...@lo...> + + * s3cmd, S3/S3.py, S3/Utils.py: Throttle upload speed and retry + when upload failed. + Report download/upload speed and time elapsed. + 2008-02-28 Michal Ludvig <mi...@lo...> * Released version 0.9.6 Modified: s3cmd/trunk/S3/S3.py =================================================================== --- s3cmd/trunk/S3/S3.py 2008-03-01 09:20:00 UTC (rev 171) +++ s3cmd/trunk/S3/S3.py 2008-03-04 12:48:10 UTC (rev 172) @@ -44,6 +44,9 @@ pass return retval +class S3UploadError(Exception): + pass + class ParameterError(Exception): pass @@ -199,7 +202,6 @@ headers["x-amz-acl"] = "public-read" request = self.create_request("OBJECT_PUT", bucket = bucket, object = object, headers = headers) response = self.send_file(request, file) - response["size"] = size return response def object_get_file(self, bucket, object, filename): @@ -359,7 +361,7 @@ raise S3Error(response) return response - def send_file(self, request, file): + def send_file(self, request, file, throttle = 0, retries = 3): method_string, resource, headers = request info("Sending file '%s', please wait..." % file.name) conn = self.get_connection(resource['bucket']) @@ -369,23 +371,43 @@ conn.putheader(header, str(headers[header])) conn.endheaders() file.seek(0) + timestamp_start = time.time() size_left = size_total = headers.get("content-length") while (size_left > 0): debug("SendFile: Reading up to %d bytes from '%s'" % (self.config.send_chunk, file.name)) data = file.read(self.config.send_chunk) debug("SendFile: Sending %d bytes to the server" % len(data)) - conn.send(data) + try: + conn.send(data) + except Exception, e: + ## When an exception occurs insert a + if retries: + conn.close() + warning("Upload of '%s' failed %s " % (file.name, e)) + throttle = throttle and throttle * 5 or 0.01 + warning("Retrying on lower speed (throttle=%0.2f)" % throttle) + return self.send_file(request, file, throttle, retries - 1) + else: + debug("Giving up on '%s' %s" % (file.name, e)) + raise S3UploadError + size_left -= len(data) + if throttle: + time.sleep(throttle) info("Sent %d bytes (%d %% of %d)" % ( (size_total - size_left), (size_total - size_left) * 100 / size_total, size_total)) + timestamp_end = time.time() response = {} http_response = conn.getresponse() response["status"] = http_response.status response["reason"] = http_response.reason response["headers"] = convertTupleListToDict(http_response.getheaders()) - response["data"] = http_response.read() + response["data"] = http_response.read() + response["elapsed"] = timestamp_end - timestamp_start + response["size"] = size_total + response["speed"] = float(response["size"]) / response["elapsed"] conn.close() if response["status"] == 307: @@ -430,6 +452,7 @@ md5_hash = md5.new() size_left = size_total = int(response["headers"]["content-length"]) size_recvd = 0 + timestamp_start = time.time() while (size_recvd < size_total): this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left debug("ReceiveFile: Receiving up to %d bytes from the server" % this_chunk) @@ -443,9 +466,12 @@ size_recvd * 100 / size_total, size_total)) conn.close() + timestamp_end = time.time() response["md5"] = md5_hash.hexdigest() response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0 + response["elapsed"] = timestamp_end - timestamp_start response["size"] = size_recvd + response["speed"] = float(response["size"]) / response["elapsed"] if response["size"] != long(response["headers"]["content-length"]): warning("Reported size (%s) does not match received size (%s)" % ( response["headers"]["content-length"], response["size"])) Modified: s3cmd/trunk/S3/Utils.py =================================================================== --- s3cmd/trunk/S3/Utils.py 2008-03-01 09:20:00 UTC (rev 171) +++ s3cmd/trunk/S3/Utils.py 2008-03-04 12:48:10 UTC (rev 172) @@ -79,8 +79,8 @@ ## treats it as "localtime". Anyway... return time.mktime(dateS3toPython(date)) -def formatSize(size, human_readable = False): - size = int(size) +def formatSize(size, human_readable = False, floating_point = False): + size = floating_point and float(size) or int(size) if human_readable: coeffs = ['k', 'M', 'G', 'T'] coeff = "" Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-03-01 09:20:00 UTC (rev 171) +++ s3cmd/trunk/s3cmd 2008-03-04 12:48:10 UTC (rev 172) @@ -190,9 +190,14 @@ real_filename = file if Config().encrypt: exitcode, real_filename, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(file) - response = s3.object_put_uri(real_filename, uri_final, extra_headers) - output("File '%s' stored as %s (%d bytes) [%d of %d]" % - (file, uri_final, response["size"], + try: + response = s3.object_put_uri(real_filename, uri_final, extra_headers) + except S3UploadError, e: + error("Upload of '%s' failed too many times. Skipping that file." % real_filename) + continue + speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) + output("File '%s' stored as %s (%d bytes in %0.1f seconds, %0.2f %sB/s) [%d of %d]" % + (file, uri_final, response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1], seq, total)) if Config().acl_public: output("Public URL of the object is: %s" % @@ -246,8 +251,9 @@ gpg_decrypt(destination, response["headers"]["x-amz-meta-s3tools-gpgenc"]) response["size"] = os.stat(destination)[6] if destination != "-": - output("Object %s saved as '%s' (%d bytes)" % - (uri, destination, response["size"])) + speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) + output("Object %s saved as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s)" % + (uri, destination, response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1])) def cmd_object_del(args): s3 = S3(Config()) @@ -413,6 +419,8 @@ total_size = 0 total_count = len(loc_list) + total_elapsed = 0.0 + timestamp_start = time.time() seq = 0 dst_base = dst_uri.uri() if not dst_base[-1] == "/": dst_base += "/" @@ -425,11 +433,22 @@ if cfg.preserve_attrs: attr_header = _build_attr_header(src) debug(attr_header) - response = s3.object_put_uri(src, uri, attr_header) - output("stored '%s' as '%s' (%d bytes) [%d of %d]" % (src, uri, response["size"], seq, total_count)) + try: + response = s3.object_put_uri(src, uri, attr_header) + except S3UploadError, e: + error("Upload of '%s' failed too many times. Skipping that file." % src) + continue + speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) + output("File '%s' stored as %s (%d bytes in %0.1f seconds, %0.2f %sB/s) [%d of %d]" % + (src, uri, response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1], + seq, total_count)) total_size += response["size"] - output("Done. Uploaded %d bytes." % total_size) + total_elapsed = time.time() - timestamp_start + speed_fmt = formatSize(total_size/total_elapsed, human_readable = True, floating_point = True) + output("Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s" % + (total_size, total_elapsed, speed_fmt[0], speed_fmt[1])) + def resolve_list(lst, args): retval = [] for item in lst: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-04-28 14:32:44
|
Revision: 174 http://s3tools.svn.sourceforge.net/s3tools/?rev=174&view=rev Author: ludvigm Date: 2008-04-28 07:32:22 -0700 (Mon, 28 Apr 2008) Log Message: ----------- * S3/S3.py: send_file() now computes MD5 sum of the file being uploaded, compares with ETag returned by Amazon and retries upload if they don't match. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/S3.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-04-28 14:31:35 UTC (rev 173) +++ s3cmd/trunk/ChangeLog 2008-04-28 14:32:22 UTC (rev 174) @@ -1,3 +1,9 @@ +2008-04-28 Michal Ludvig <mi...@lo...> + + * S3/S3.py: send_file() now computes MD5 sum of the file + being uploaded, compares with ETag returned by Amazon + and retries upload if they don't match. + 2008-03-05 Michal Ludvig <mi...@lo...> * s3cmd, S3/S3.py, S3/Utils.py: Throttle upload speed and retry Modified: s3cmd/trunk/S3/S3.py =================================================================== --- s3cmd/trunk/S3/S3.py 2008-04-28 14:31:35 UTC (rev 173) +++ s3cmd/trunk/S3/S3.py 2008-04-28 14:32:22 UTC (rev 174) @@ -372,10 +372,12 @@ conn.endheaders() file.seek(0) timestamp_start = time.time() + md5_hash = md5.new() size_left = size_total = headers.get("content-length") while (size_left > 0): debug("SendFile: Reading up to %d bytes from '%s'" % (self.config.send_chunk, file.name)) data = file.read(self.config.send_chunk) + md5_hash.update(data) debug("SendFile: Sending %d bytes to the server" % len(data)) try: conn.send(data) @@ -399,6 +401,7 @@ (size_total - size_left) * 100 / size_total, size_total)) timestamp_end = time.time() + md5_computed = md5_hash.hexdigest() response = {} http_response = conn.getresponse() response["status"] = http_response.status @@ -418,6 +421,16 @@ info("Redirected to: %s" % (redir_hostname)) return self.send_file(request, file) + debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"])) + if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest(): + warning("MD5 Sums don't match!") + if retries: + info("Retrying upload.") + return self.send_file(request, file, throttle, retries - 1) + else: + debug("Too many failures. Giving up on '%s'" % (file.name)) + raise S3UploadError + if response["status"] < 200 or response["status"] > 299: raise S3Error(response) return response This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-04-29 09:39:47
|
Revision: 175 http://s3tools.svn.sourceforge.net/s3tools/?rev=175&view=rev Author: ludvigm Date: 2008-04-29 02:39:39 -0700 (Tue, 29 Apr 2008) Log Message: ----------- 2008-04-29 Michal Ludvig <mi...@lo...> * S3/SortedDict.py: rewritten from scratch to preserve case of keys while still sorting in case-ignore mode. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/SortedDict.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-04-28 14:32:22 UTC (rev 174) +++ s3cmd/trunk/ChangeLog 2008-04-29 09:39:39 UTC (rev 175) @@ -1,3 +1,8 @@ +2008-04-29 Michal Ludvig <mi...@lo...> + + * S3/SortedDict.py: rewritten from scratch to preserve + case of keys while still sorting in case-ignore mode. + 2008-04-28 Michal Ludvig <mi...@lo...> * S3/S3.py: send_file() now computes MD5 sum of the file Modified: s3cmd/trunk/S3/SortedDict.py =================================================================== --- s3cmd/trunk/S3/SortedDict.py 2008-04-28 14:32:22 UTC (rev 174) +++ s3cmd/trunk/S3/SortedDict.py 2008-04-29 09:39:39 UTC (rev 175) @@ -3,45 +3,53 @@ ## http://www.logix.cz/michal ## License: GPL Version 2 +from BidirMap import BidirMap + class SortedDictIterator(object): - def __init__(self, dict): - self.dict = dict - self.keys = dict.keys() - self.index = 0 - self.length = len(self.keys) + def __init__(self, sorted_dict, keys): + self.sorted_dict = sorted_dict + self.keys = keys def next(self): - if self.length <= self.index: + try: + return self.keys.pop(0) + except IndexError: raise StopIteration - retval = self.keys[self.index] - self.index += 1 - return retval - - class SortedDict(dict): - def __setitem__(self, name, value): - try: - value = value.strip() - except: - pass - dict.__setitem__(self, name.lower(), value) + keys_sort_lowercase = True - def __iter__(self): - return SortedDictIterator(self) - - def keys(self): keys = dict.keys(self) - keys.sort() - return keys - - def popitem(self): - keys = self.keys() - if len(keys) < 1: - raise KeyError("popitem(): dictionary is empty") - retval = (keys[0], dict.__getitem__(self, keys[0])) - dict.__delitem__(self, keys[0]) - return retval + if self.keys_sort_lowercase: + # Translation map + xlat_map = BidirMap() + for key in keys: + xlat_map[key.lower()] = key + # Lowercase keys + lc_keys = xlat_map.keys() + lc_keys.sort() + return [xlat_map[k] for k in lc_keys] + else: + keys.sort() + return keys + def __iter__(self): + return SortedDictIterator(self, self.keys()) +if __name__ == "__main__": + d = SortedDict() + d['AWS'] = 1 + d['Action'] = 2 + d['america'] = 3 + d.keys_sort_lowercase = True + print "Wanted: Action, america, AWS," + print "Got: ", + for key in d: + print "%s," % key, + print " __iter__()" + d.keys_return_lowercase = True + print "Got: ", + for key in d.keys(): + print "%s," % key, + print " keys()" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-04-29 09:42:53
|
Revision: 177 http://s3tools.svn.sourceforge.net/s3tools/?rev=177&view=rev Author: ludvigm Date: 2008-04-29 02:42:38 -0700 (Tue, 29 Apr 2008) Log Message: ----------- 2008-04-29 Michal Ludvig <mi...@lo...> * S3/Exceptions.py: Exceptions moved out of S3.S3 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/S3.py s3cmd/trunk/s3cmd Added Paths: ----------- s3cmd/trunk/S3/Exceptions.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-04-29 09:40:52 UTC (rev 176) +++ s3cmd/trunk/ChangeLog 2008-04-29 09:42:38 UTC (rev 177) @@ -1,5 +1,6 @@ 2008-04-29 Michal Ludvig <mi...@lo...> + * S3/Exceptions.py: Exceptions moved out of S3.S3 * S3/SortedDict.py: rewritten from scratch to preserve case of keys while still sorting in case-ignore mode. Added: s3cmd/trunk/S3/Exceptions.py =================================================================== --- s3cmd/trunk/S3/Exceptions.py (rev 0) +++ s3cmd/trunk/S3/Exceptions.py 2008-04-29 09:42:38 UTC (rev 177) @@ -0,0 +1,43 @@ +## Amazon S3 manager - Exceptions library +## Author: Michal Ludvig <mi...@lo...> +## http://www.logix.cz/michal +## License: GPL Version 2 + +from logging import debug, info, warning, error + +try: + import xml.etree.ElementTree as ET +except ImportError: + import elementtree.ElementTree as ET + +class S3Error (Exception): + def __init__(self, response): + self.status = response["status"] + self.reason = response["reason"] + self.info = {} + debug("S3Error: %s (%s)" % (self.status, self.reason)) + if response.has_key("headers"): + for header in response["headers"]: + debug("HttpHeader: %s: %s" % (header, response["headers"][header])) + if response.has_key("data"): + tree = ET.fromstring(response["data"]) + for child in tree.getchildren(): + if child.text != "": + debug("ErrorXML: " + child.tag + ": " + repr(child.text)) + self.info[child.tag] = child.text + + def __str__(self): + retval = "%d (%s)" % (self.status, self.reason) + try: + retval += (": %s" % self.info["Code"]) + except (AttributeError, KeyError): + pass + return retval + +class S3UploadError(Exception): + pass + +class ParameterError(Exception): + pass + + Modified: s3cmd/trunk/S3/S3.py =================================================================== --- s3cmd/trunk/S3/S3.py 2008-04-29 09:40:52 UTC (rev 176) +++ s3cmd/trunk/S3/S3.py 2008-04-29 09:42:38 UTC (rev 177) @@ -19,37 +19,8 @@ from SortedDict import SortedDict from BidirMap import BidirMap from Config import Config +from Exceptions import * -class S3Error (Exception): - def __init__(self, response): - self.status = response["status"] - self.reason = response["reason"] - self.info = {} - debug("S3Error: %s (%s)" % (self.status, self.reason)) - if response.has_key("headers"): - for header in response["headers"]: - debug("HttpHeader: %s: %s" % (header, response["headers"][header])) - if response.has_key("data"): - tree = ET.fromstring(response["data"]) - for child in tree.getchildren(): - if child.text != "": - debug("ErrorXML: " + child.tag + ": " + repr(child.text)) - self.info[child.tag] = child.text - - def __str__(self): - retval = "%d (%s)" % (self.status, self.reason) - try: - retval += (": %s" % self.info["Code"]) - except (AttributeError, KeyError): - pass - return retval - -class S3UploadError(Exception): - pass - -class ParameterError(Exception): - pass - class S3(object): http_methods = BidirMap( GET = 0x01, Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-04-29 09:40:52 UTC (rev 176) +++ s3cmd/trunk/s3cmd 2008-04-29 09:42:38 UTC (rev 177) @@ -24,8 +24,8 @@ from S3.Config import Config from S3.S3Uri import * from S3 import Utils +from S3.Exceptions import * - def output(message): print message This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-04-29 09:43:56
|
Revision: 178 http://s3tools.svn.sourceforge.net/s3tools/?rev=178&view=rev Author: ludvigm Date: 2008-04-29 02:43:51 -0700 (Tue, 29 Apr 2008) Log Message: ----------- 2008-04-29 Michal Ludvig <mi...@lo...> * s3db, S3/SimpleDB.py: Initial support for Amazon SimpleDB. For now implements ListDomains() call and most of the infrastructure required for request creation. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/Config.py Added Paths: ----------- s3cmd/trunk/S3/SimpleDB.py s3cmd/trunk/s3db Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-04-29 09:42:38 UTC (rev 177) +++ s3cmd/trunk/ChangeLog 2008-04-29 09:43:51 UTC (rev 178) @@ -1,5 +1,11 @@ 2008-04-29 Michal Ludvig <mi...@lo...> + * s3db, S3/SimpleDB.py: Initial support for Amazon SimpleDB. + For now implements ListDomains() call and most of the + infrastructure required for request creation. + +2008-04-29 Michal Ludvig <mi...@lo...> + * S3/Exceptions.py: Exceptions moved out of S3.S3 * S3/SortedDict.py: rewritten from scratch to preserve case of keys while still sorting in case-ignore mode. Modified: s3cmd/trunk/S3/Config.py =================================================================== --- s3cmd/trunk/S3/Config.py 2008-04-29 09:42:38 UTC (rev 177) +++ s3cmd/trunk/S3/Config.py 2008-04-29 09:43:51 UTC (rev 178) @@ -15,6 +15,7 @@ secret_key = "" host_base = "s3.amazonaws.com" host_bucket = "%(bucket)s.s3.amazonaws.com" + simpledb_host = "sdb.amazonaws.com" verbosity = logging.WARNING send_chunk = 4096 recv_chunk = 4096 Added: s3cmd/trunk/S3/SimpleDB.py =================================================================== --- s3cmd/trunk/S3/SimpleDB.py (rev 0) +++ s3cmd/trunk/S3/SimpleDB.py 2008-04-29 09:43:51 UTC (rev 178) @@ -0,0 +1,102 @@ +## Amazon SimpleDB library +## Author: Michal Ludvig <mi...@lo...> +## http://www.logix.cz/michal +## License: GPL Version 2 + +""" +Low-level class for working with Amazon SimpleDB +""" + +import time +import urllib +import base64 +import hmac +import sha +import httplib +from logging import debug, info, warning, error + +from Utils import convertTupleListToDict +from SortedDict import SortedDict +from Exceptions import * + +class SimpleDB(object): + # API Version + # See http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/ + Version = "2007-11-07" + SignatureVersion = 1 + + def __init__(self, config): + self.config = config + + def ListDomains(self, MaxNumberOfDomains = 100): + ''' + Lists all domains associated with our Access Key. Returns + domain names up to the limit set by MaxNumberOfDomains. + ''' + parameters = SortedDict() + parameters['MaxNumberOfDomains'] = MaxNumberOfDomains + response = self.send_request("ListDomains", domain = None, parameters = parameters) + return response + + def send_request(self, *args, **kwargs): + request = self.create_request(*args, **kwargs) + debug("Request: %s" % repr(request)) + conn = self.get_connection() + conn.request("GET", self.format_uri(request['uri_params'])) + http_response = conn.getresponse() + response = {} + response["status"] = http_response.status + response["reason"] = http_response.reason + response["headers"] = convertTupleListToDict(http_response.getheaders()) + response["data"] = http_response.read() + debug("Response: " + str(response)) + conn.close() + + if response["status"] < 200 or response["status"] > 299: + raise S3Error(response) + + return response + + def create_request(self, action, domain, parameters = None): + if not parameters: + parameters = SortedDict() + parameters['AWSAccessKeyId'] = self.config.access_key + parameters['Version'] = self.Version + parameters['SignatureVersion'] = self.SignatureVersion + parameters['Action'] = action + parameters['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + if domain: + parameters['DomainName'] = domain + parameters['Signature'] = self.sign_request(parameters) + parameters.keys_return_lowercase = False + uri_params = urllib.urlencode(parameters) + request = {} + request['uri_params'] = uri_params + request['parameters'] = parameters + return request + + def sign_request(self, parameters): + h = "" + parameters.keys_sort_lowercase = True + parameters.keys_return_lowercase = False + for key in parameters: + h += "%s%s" % (key, parameters[key]) + debug("SignRequest: %s" % h) + return base64.encodestring(hmac.new(self.config.secret_key, h, sha).digest()).strip() + + def get_connection(self): + if self.config.proxy_host != "": + return httplib.HTTPConnection(self.config.proxy_host, self.config.proxy_port) + else: + if self.config.use_https: + return httplib.HTTPSConnection(self.config.simpledb_host) + else: + return httplib.HTTPConnection(self.config.simpledb_host) + + def format_uri(self, uri_params): + if self.config.proxy_host != "": + uri = "http://%s/?%s" % (self.config.simpledb_host, uri_params) + else: + uri = "/?%s" % uri_params + debug('format_uri(): ' + uri) + return uri Added: s3cmd/trunk/s3db =================================================================== --- s3cmd/trunk/s3db (rev 0) +++ s3cmd/trunk/s3db 2008-04-29 09:43:51 UTC (rev 178) @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +## Amazon S3 manager +## Author: Michal Ludvig <mi...@lo...> +## http://www.logix.cz/michal +## License: GPL Version 2 + +import sys +import os +import logging + +from optparse import OptionParser, Option, OptionValueError, IndentedHelpFormatter +from logging import debug, info, warning, error + +## Our modules +from S3 import PkgInfo +from S3.SimpleDB import SimpleDB +from S3.Config import Config +from S3.Exceptions import * + + +if __name__ == '__main__': + if float("%d.%d" %(sys.version_info[0], sys.version_info[1])) < 2.4: + sys.stderr.write("ERROR: Python 2.4 or higher required, sorry.\n") + sys.exit(1) + logging.root.setLevel(logging.DEBUG) + cfg = Config(os.getenv("HOME")+"/.s3cfg") + sdb = SimpleDB(cfg) + print sdb.ListDomains() Property changes on: s3cmd/trunk/s3db ___________________________________________________________________ Name: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-04-30 14:10:48
|
Revision: 179 http://s3tools.svn.sourceforge.net/s3tools/?rev=179&view=rev Author: ludvigm Date: 2008-04-30 07:10:34 -0700 (Wed, 30 Apr 2008) Log Message: ----------- 2008-04-30 Michal Ludvig <mi...@lo...> * s3db, S3/SimpleDB.py: Implemented almost full SimpleDB API. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/SimpleDB.py s3cmd/trunk/s3db Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-04-29 09:43:51 UTC (rev 178) +++ s3cmd/trunk/ChangeLog 2008-04-30 14:10:34 UTC (rev 179) @@ -1,3 +1,7 @@ +2008-04-30 Michal Ludvig <mi...@lo...> + + * s3db, S3/SimpleDB.py: Implemented almost full SimpleDB API. + 2008-04-29 Michal Ludvig <mi...@lo...> * s3db, S3/SimpleDB.py: Initial support for Amazon SimpleDB. Modified: s3cmd/trunk/S3/SimpleDB.py =================================================================== --- s3cmd/trunk/S3/SimpleDB.py 2008-04-29 09:43:51 UTC (rev 178) +++ s3cmd/trunk/S3/SimpleDB.py 2008-04-30 14:10:34 UTC (rev 179) @@ -28,6 +28,10 @@ def __init__(self, config): self.config = config + ## ------------------------------------------------ + ## Methods implementing SimpleDB API + ## ------------------------------------------------ + def ListDomains(self, MaxNumberOfDomains = 100): ''' Lists all domains associated with our Access Key. Returns @@ -35,12 +39,79 @@ ''' parameters = SortedDict() parameters['MaxNumberOfDomains'] = MaxNumberOfDomains - response = self.send_request("ListDomains", domain = None, parameters = parameters) - return response - + return self.send_request("ListDomains", DomainName = None, parameters = parameters) + + def CreateDomain(self, DomainName): + return self.send_request("CreateDomain", DomainName = DomainName) + + def DeleteDomain(self, DomainName): + return self.send_request("DeleteDomain", DomainName = DomainName) + + def PutAttributes(self, DomainName, ItemName, Attributes): + parameters = SortedDict() + parameters['ItemName'] = ItemName + seq = 0 + for attrib in Attributes: + if type(Attributes[attrib]) == type(list()): + for value in Attributes[attrib]: + parameters['Attribute.%d.Name' % seq] = attrib + parameters['Attribute.%d.Value' % seq] = unicode(value) + seq += 1 + else: + parameters['Attribute.%d.Name' % seq] = attrib + parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib]) + seq += 1 + ## TODO: + ## - support for Attribute.N.Replace + ## - support for multiple values for one attribute + return self.send_request("PutAttributes", DomainName = DomainName, parameters = parameters) + + def GetAttributes(self, DomainName, ItemName, Attributes = []): + parameters = SortedDict() + parameters['ItemName'] = ItemName + seq = 0 + for attrib in Attributes: + parameters['AttributeName.%d' % seq] = attrib + seq += 1 + return self.send_request("GetAttributes", DomainName = DomainName, parameters = parameters) + + def DeleteAttributes(self, DomainName, ItemName, Attributes = {}): + """ + Remove specified Attributes from ItemName. + Attributes parameter can be either: + - not specified, in which case the whole Item is removed + - list, e.g. ['Attr1', 'Attr2'] in which case these parameters are removed + - dict, e.g. {'Attr' : 'One', 'Attr' : 'Two'} in which case the + specified values are removed from multi-value attributes. + """ + parameters = SortedDict() + parameters['ItemName'] = ItemName + seq = 0 + for attrib in Attributes: + parameters['Attribute.%d.Name' % seq] = attrib + if type(Attributes) == type(dict()): + parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib]) + seq += 1 + return self.send_request("DeleteAttributes", DomainName = DomainName, parameters = parameters) + + def Query(self, DomainName, QueryExpression = None, MaxNumberOfItems = None, NextToken = None): + parameters = SortedDict() + if QueryExpression: + parameters['QueryExpression'] = QueryExpression + if MaxNumberOfItems: + parameters['MaxNumberOfItems'] = MaxNumberOfItems + if NextToken: + parameters['NextToken'] = NextToken + return self.send_request("Query", DomainName = DomainName, parameters = parameters) + ## Handle NextToken? Or maybe not - let the upper level do it + + ## ------------------------------------------------ + ## Low-level methods for handling SimpleDB requests + ## ------------------------------------------------ + def send_request(self, *args, **kwargs): request = self.create_request(*args, **kwargs) - debug("Request: %s" % repr(request)) + #debug("Request: %s" % repr(request)) conn = self.get_connection() conn.request("GET", self.format_uri(request['uri_params'])) http_response = conn.getresponse() @@ -49,24 +120,24 @@ response["reason"] = http_response.reason response["headers"] = convertTupleListToDict(http_response.getheaders()) response["data"] = http_response.read() - debug("Response: " + str(response)) conn.close() if response["status"] < 200 or response["status"] > 299: + debug("Response: " + str(response)) raise S3Error(response) return response - def create_request(self, action, domain, parameters = None): + def create_request(self, Action, DomainName, parameters = None): if not parameters: parameters = SortedDict() parameters['AWSAccessKeyId'] = self.config.access_key parameters['Version'] = self.Version parameters['SignatureVersion'] = self.SignatureVersion - parameters['Action'] = action + parameters['Action'] = Action parameters['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) - if domain: - parameters['DomainName'] = domain + if DomainName: + parameters['DomainName'] = DomainName parameters['Signature'] = self.sign_request(parameters) parameters.keys_return_lowercase = False uri_params = urllib.urlencode(parameters) @@ -81,7 +152,7 @@ parameters.keys_return_lowercase = False for key in parameters: h += "%s%s" % (key, parameters[key]) - debug("SignRequest: %s" % h) + #debug("SignRequest: %s" % h) return base64.encodestring(hmac.new(self.config.secret_key, h, sha).digest()).strip() def get_connection(self): @@ -98,5 +169,5 @@ uri = "http://%s/?%s" % (self.config.simpledb_host, uri_params) else: uri = "/?%s" % uri_params - debug('format_uri(): ' + uri) + #debug('format_uri(): ' + uri) return uri Modified: s3cmd/trunk/s3db =================================================================== --- s3cmd/trunk/s3db 2008-04-29 09:43:51 UTC (rev 178) +++ s3cmd/trunk/s3db 2008-04-30 14:10:34 UTC (rev 179) @@ -1,5 +1,5 @@ #!/usr/bin/env python - +# vim: set fileencoding=utf-8 : ## Amazon S3 manager ## Author: Michal Ludvig <mi...@lo...> ## http://www.logix.cz/michal @@ -18,12 +18,36 @@ from S3.Config import Config from S3.Exceptions import * - +def display_response(response): + print "%s\n%s\n%s" % ('-'*40, response['data'], '-'*40) + if __name__ == '__main__': if float("%d.%d" %(sys.version_info[0], sys.version_info[1])) < 2.4: sys.stderr.write("ERROR: Python 2.4 or higher required, sorry.\n") sys.exit(1) + cfg = Config(os.getenv("HOME")+"/.s3cfg") + logging.root.setLevel(logging.DEBUG) - cfg = Config(os.getenv("HOME")+"/.s3cfg") sdb = SimpleDB(cfg) - print sdb.ListDomains() + + try: + display_response(sdb.ListDomains()) + + display_response(sdb.CreateDomain("logix.cz-test")) + + display_response(sdb.ListDomains()) + + display_response(sdb.PutAttributes("logix.cz-test", "AbCd", {'First': "One", "Second" : 2, "Third" : u"drei"})) + display_response(sdb.PutAttributes("logix.cz-test", "XyZ", {'xyz' : ['x', 'y', 'z'], 'Third' : u'traja'})) + + display_response(sdb.GetAttributes("logix.cz-test", "AbCd", ['Second', 'Third'])) + display_response(sdb.GetAttributes("logix.cz-test", "XyZ")) + + display_response(sdb.Query("logix.cz-test", "['xyz' = 'z']")) + + display_response(sdb.DeleteDomain("logix.cz-test")) + + display_response(sdb.ListDomains()) + except S3Error, e: + error(e) + error(e.info) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-03 13:40:18
|
Revision: 180 http://s3tools.svn.sourceforge.net/s3tools/?rev=180&view=rev Author: ludvigm Date: 2008-06-03 06:40:11 -0700 (Tue, 03 Jun 2008) Log Message: ----------- * s3cmd: Refactored cmd_sync() in preparation for remote->local sync. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-04-30 14:10:34 UTC (rev 179) +++ s3cmd/trunk/ChangeLog 2008-06-03 13:40:11 UTC (rev 180) @@ -1,3 +1,8 @@ +2008-06-04 Michal Ludvig <mi...@lo...> + + * s3cmd: Refactored cmd_sync() in preparation + for remote->local sync. + 2008-04-30 Michal Ludvig <mi...@lo...> * s3db, S3/SimpleDB.py: Implemented almost full SimpleDB API. Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-04-30 14:10:34 UTC (rev 179) +++ s3cmd/trunk/s3cmd 2008-06-03 13:40:11 UTC (rev 180) @@ -298,52 +298,15 @@ else: raise -def cmd_sync(args): - def _build_attr_header(src): - attrs = {} - st = os.stat_result(os.stat(src)) - for attr in cfg.preserve_attrs_list: - if attr == 'uname': - try: - val = pwd.getpwuid(st.st_uid).pw_name - except KeyError: - attr = "uid" - val = st.st_uid - warning("%s: Owner username not known. Storing UID=%d instead." % (src, val)) - elif attr == 'gname': - try: - val = grp.getgrgid(st.st_gid).gr_name - except KeyError: - attr = "gid" - val = st.st_gid - warning("%s: Owner groupname not known. Storing GID=%d instead." % (src, val)) - else: - val = getattr(st, 'st_' + attr) - attrs[attr] = val - result = "" - for k in attrs: result += "%s:%s/" % (k, attrs[k]) - return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } - src = args.pop(0) - if S3Uri(src).type != "file": - raise ParameterError("Source must be a local path instead of: %s" % src) - dst = args.pop(0) - if not dst.endswith('/'): - dst += "/" - dst_uri = S3Uri(dst) - if dst_uri.type != "s3": - raise ParameterError("Destination must be a S3 URI instead of: %s" % dst) - if (len(args)): - raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param']) - - s3 = S3(Config()) - +def _get_filelist_local(local_uri): output("Compiling list of local files...") - if os.path.isdir(src): - loc_base = os.path.join(src, "") - filelist = os.walk(src) + local_path = local_uri.path() + if os.path.isdir(local_path): + loc_base = os.path.join(local_path, "") + filelist = os.walk(local_path) else: loc_base = "./" - filelist = [( '.', [], [src] )] + filelist = [( '.', [], [local_path] )] loc_base_len = len(loc_base) loc_list = {} for root, dirs, files in filelist: @@ -364,15 +327,17 @@ 'mtime' : sr.st_mtime, ## TODO: Possibly more to save here... } - loc_count = len(loc_list) - + return loc_list + +def _get_filelist_remote(remote_uri): output("Retrieving list of remote files...") - response = s3.bucket_list(dst_uri.bucket(), prefix = dst_uri.object()) - rem_base = dst_uri.object() + s3 = S3(Config()) + response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object()) + + rem_base = remote_uri.object() rem_base_len = len(rem_base) rem_list = {} - rem_count = len(response['list']) for object in response['list']: key = object['Key'][rem_base_len:].encode('utf-8') rem_list[key] = { @@ -381,33 +346,86 @@ 'md5' : object['ETag'][1:-1], 'object_key' : object['Key'].encode('utf-8'), } - output("Found %d local files, %d remote files" % (loc_count, rem_count)) - + return rem_list + +def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote): output("Verifying checksums...") - for file in loc_list.keys(): + exists_list = {} + for file in src_list.keys(): debug("Checking %s ..." % file) - if rem_list.has_key(file): + if dst_list.has_key(file): debug("%s exists in remote list" % file) ## Check size first - if rem_list[file]['size'] == loc_list[file]['size']: - debug("%s same size: %s" % (file, rem_list[file]['size'])) + if dst_list[file]['size'] == src_list[file]['size']: + debug("%s same size: %s" % (file, dst_list[file]['size'])) ## ... same size, check MD5 - loc_md5 = Utils.hash_file_md5(loc_list[file]['full_name']) - if loc_md5 == rem_list[file]['md5']: - debug("%s md5 matches: %s" % (file, rem_list[file]['md5'])) + if src_is_local_and_dst_is_remote: + src_md5 = Utils.hash_file_md5(src_list[file]['full_name']) + dst_md5 = dst_list[file]['md5'] + else: + src_md5 = src_list[file]['md5'] + dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name']) + if src_md5 == dst_md5: + debug("%s md5 matches: %s" % (file, dst_md5)) ## Checksums are the same. - ## Remove from local-list, all that is left there will be uploaded - debug("%s removed from local list - upload not needed" % file) - del(loc_list[file]) + ## Remove from source-list, all that is left there will be transferred + debug("%s removed from source list - transfer not needed" % file) + exists_list[file] = src_list[file] + del(src_list[file]) else: - debug("! %s md5 mismatch: local=%s remote=%s" % (file, loc_md5, rem_list[file]['md5'])) + debug("! %s md5 mismatch: src=%s dst=%s" % (file, src_md5, dst_md5)) else: - debug("! %s size mismatch: local=%s remote=%s" % (file, loc_list[file]['size'], rem_list[file]['size'])) + debug("! %s size mismatch: src=%s dst=%s" % (file, src_list[file]['size'], dst_list[file]['size'])) - ## Remove from remote-list, all that is left there will be deleted - debug("%s removed from remote list" % file) - del(rem_list[file]) + ## Remove from destination-list, all that is left there will be deleted + debug("%s removed from destination list" % file) + del(dst_list[file]) + return src_list, dst_list, exists_list +def cmd_sync_remote2local(src, dst): + raise NotImplementedError("Remote->Local sync is not yet implemented.") + +def cmd_sync_local2remote(src, dst): + def _build_attr_header(src): + attrs = {} + st = os.stat_result(os.stat(src)) + for attr in cfg.preserve_attrs_list: + if attr == 'uname': + try: + val = pwd.getpwuid(st.st_uid).pw_name + except KeyError: + attr = "uid" + val = st.st_uid + warning("%s: Owner username not known. Storing UID=%d instead." % (src, val)) + elif attr == 'gname': + try: + val = grp.getgrgid(st.st_gid).gr_name + except KeyError: + attr = "gid" + val = st.st_gid + warning("%s: Owner groupname not known. Storing GID=%d instead." % (src, val)) + else: + val = getattr(st, 'st_' + attr) + attrs[attr] = val + result = "" + for k in attrs: result += "%s:%s/" % (k, attrs[k]) + return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } + + s3 = S3(Config()) + + src_uri = S3Uri(src) + dst_uri = S3Uri(dst) + + loc_list = _get_filelist_local(src_uri) + loc_count = len(loc_list) + + rem_list = _get_filelist_remote(dst_uri) + rem_count = len(rem_list) + + output("Found %d local files, %d remote files" % (loc_count, rem_count)) + + _compare_filelists(loc_list, rem_list, True) + output("Summary: %d local files to upload, %d remote files to delete" % (len(loc_list), len(rem_list))) for file in rem_list: uri = S3Uri("s3://" + dst_uri.bucket()+"/"+rem_list[file]['object_key']) @@ -449,6 +467,20 @@ output("Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s" % (total_size, total_elapsed, speed_fmt[0], speed_fmt[1])) +def cmd_sync(args): + src = args.pop(0) + dst = args.pop(0) + if (len(args)): + raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param']) + + if not dst.endswith('/'): + dst += "/" + + if S3Uri(src).type == "file" and S3Uri(dst).type == "s3": + return cmd_sync_local2remote(src, dst) + if S3Uri(src).type == "s3" and S3Uri(dst).type == "file": + return cmd_sync_remote2local(src, dst) + def resolve_list(lst, args): retval = [] for item in lst: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-04 10:21:31
|
Revision: 181 http://s3tools.svn.sourceforge.net/s3tools/?rev=181&view=rev Author: ludvigm Date: 2008-06-04 03:21:29 -0700 (Wed, 04 Jun 2008) Log Message: ----------- Implemented S3->local sync * s3cmd: Implemented cmd_sync_remote2local() for restoring backup from S3 to a local filesystem * S3/S3.py: S3.object_get_uri() now requires writable stream and not a path name. * S3/Utils.py: Added mkdir_with_parents() Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/Exceptions.py s3cmd/trunk/S3/S3.py s3cmd/trunk/S3/Utils.py s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-03 13:40:11 UTC (rev 180) +++ s3cmd/trunk/ChangeLog 2008-06-04 10:21:29 UTC (rev 181) @@ -1,5 +1,13 @@ 2008-06-04 Michal Ludvig <mi...@lo...> + * s3cmd: Implemented cmd_sync_remote2local() for restoring + backup from S3 to a local filesystem + * S3/S3.py: S3.object_get_uri() now requires writable stream + and not a path name. + * S3/Utils.py: Added mkdir_with_parents() + +2008-06-04 Michal Ludvig <mi...@lo...> + * s3cmd: Refactored cmd_sync() in preparation for remote->local sync. Modified: s3cmd/trunk/S3/Exceptions.py =================================================================== --- s3cmd/trunk/S3/Exceptions.py 2008-06-03 13:40:11 UTC (rev 180) +++ s3cmd/trunk/S3/Exceptions.py 2008-06-04 10:21:29 UTC (rev 181) @@ -37,6 +37,9 @@ class S3UploadError(Exception): pass +class S3DownloadError(Exception): + pass + class ParameterError(Exception): pass Modified: s3cmd/trunk/S3/S3.py =================================================================== --- s3cmd/trunk/S3/S3.py 2008-06-03 13:40:11 UTC (rev 180) +++ s3cmd/trunk/S3/S3.py 2008-06-04 10:21:29 UTC (rev 181) @@ -175,36 +175,25 @@ response = self.send_file(request, file) return response - def object_get_file(self, bucket, object, filename): - try: - stream = open(filename, "wb") - except IOError, e: - raise ParameterError("%s: %s" % (filename, e.strerror)) - return self.object_get_stream(bucket, object, stream) - - def object_get_stream(self, bucket, object, stream): - request = self.create_request("OBJECT_GET", bucket = bucket, object = object) + def object_get_uri(self, uri, stream): + if uri.type != "s3": + raise ValueError("Expected URI type 's3', got '%s'" % uri.type) + request = self.create_request("OBJECT_GET", bucket = uri.bucket(), object = uri.object()) response = self.recv_file(request, stream) return response - + def object_delete(self, bucket, object): request = self.create_request("OBJECT_DELETE", bucket = bucket, object = object) response = self.send_request(request) return response def object_put_uri(self, filename, uri, extra_headers = None): + # TODO TODO + # Make it consistent with stream-oriented object_get_uri() if uri.type != "s3": raise ValueError("Expected URI type 's3', got '%s'" % uri.type) return self.object_put(filename, uri.bucket(), uri.object(), extra_headers) - def object_get_uri(self, uri, filename): - if uri.type != "s3": - raise ValueError("Expected URI type 's3', got '%s'" % uri.type) - if filename == "-": - return self.object_get_stream(uri.bucket(), uri.object(), sys.stdout) - else: - return self.object_get_file(uri.bucket(), uri.object(), filename) - def object_delete_uri(self, uri): if uri.type != "s3": raise ValueError("Expected URI type 's3', got '%s'" % uri.type) Modified: s3cmd/trunk/S3/Utils.py =================================================================== --- s3cmd/trunk/S3/Utils.py 2008-06-03 13:40:11 UTC (rev 180) +++ s3cmd/trunk/S3/Utils.py 2008-06-04 10:21:29 UTC (rev 181) @@ -11,6 +11,8 @@ import md5 import errno +from logging import debug, info, warning, error + try: import xml.etree.ElementTree as ET except ImportError: @@ -140,3 +142,29 @@ h.update(f.read()) f.close() return h.hexdigest() + +def mkdir_with_parents(dir_name, mode): + """ + mkdir_with_parents(dst_dir, mode) + + Create directory 'dir_name' with all parent directories + + Returns True on success, False otherwise. + """ + pathmembers = dir_name.split(os.sep) + tmp_stack = [] + while pathmembers and not os.path.isdir(os.sep.join(pathmembers)): + tmp_stack.append(pathmembers.pop()) + while tmp_stack: + pathmembers.append(tmp_stack.pop()) + cur_dir = os.sep.join(pathmembers) + try: + debug("mkdir(%s)" % cur_dir) + os.mkdir(cur_dir) + except IOError, e: + error("%s: can not make directory: %s" % (cur_dir, e.strerror)) + return False + except Exception, e: + error("%s: %s" % (cur_dir, e)) + return False + return True Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-03 13:40:11 UTC (rev 180) +++ s3cmd/trunk/s3cmd 2008-06-04 10:21:29 UTC (rev 181) @@ -243,10 +243,19 @@ else: # By default the destination filename is the object name destination = uri.object() - - if not Config().force and os.path.exists(destination): - raise ParameterError("File %s already exists. Use --force to overwrite it" % destination) - response = s3.object_get_uri(uri, destination) + if destination == "-": + ## stdout + dst_stream = sys.stdout + else: + ## File + if not Config().force and os.path.exists(destination): + raise ParameterError("File %s already exists. Use --force to overwrite it" % destination) + try: + dst_stream = open(destination, "wb") + except IOError, e: + error("Skipping %s: %s" % (destination, e.strerror)) + continue + response = s3.object_get_uri(uri, dst_stream) if response["headers"].has_key("x-amz-meta-s3tools-gpgenc"): gpg_decrypt(destination, response["headers"]["x-amz-meta-s3tools-gpgenc"]) response["size"] = os.stat(destination)[6] @@ -383,8 +392,107 @@ return src_list, dst_list, exists_list def cmd_sync_remote2local(src, dst): - raise NotImplementedError("Remote->Local sync is not yet implemented.") + def _parse_attrs_header(attrs_header): + attrs = {} + for attr in attrs_header.split("/"): + key, val = attr.split(":") + attrs[key] = val + return attrs + + s3 = S3(Config()) + src_uri = S3Uri(src) + dst_uri = S3Uri(dst) + + rem_list = _get_filelist_remote(src_uri) + rem_count = len(rem_list) + + loc_list = _get_filelist_local(dst_uri) + loc_count = len(loc_list) + + output("Found %d remote files, %d local files" % (rem_count, loc_count)) + + _compare_filelists(rem_list, loc_list, False) + + output("Summary: %d remote files to download, %d local files to delete" % (len(rem_list), len(loc_list))) + + for file in loc_list: + if cfg.delete_removed: + # os.unlink(file) + output("deleted '%s'" % file) + else: + output("not-deleted '%s'" % file) + + total_size = 0 + total_count = len(rem_list) + total_elapsed = 0.0 + timestamp_start = time.time() + seq = 0 + dir_cache = {} + src_base = src_uri.uri() + dst_base = dst_uri.path() + if not src_base[-1] == "/": src_base += "/" + file_list = rem_list.keys() + file_list.sort() + for file in file_list: + seq += 1 + uri = S3Uri(src_base + file) + dst_file = dst_base + file + try: + dst_dir = os.path.dirname(dst_file) + if not dir_cache.has_key(dst_dir): + dir_cache[dst_dir] = Utils.mkdir_with_parents(dst_dir, mode = 022) + if dir_cache[dst_dir] == False: + warning("%s: destination directory not writable: %s" % (file, dst_dir)) + continue + try: + open_flags = os.O_CREAT + if cfg.force: + open_flags |= os.O_TRUNC + else: + open_flags |= os.O_EXCL + + debug("dst_file=%s" % dst_file) + # This will have failed should the file exist + os.open(dst_file, open_flags) + # Yeah I know there is a race condition here. Sadly I don't know how to open() in exclusive mode. + dst_stream = open(dst_file, "wb") + response = s3.object_get_uri(uri, dst_stream) + dst_stream.close() + if response['headers'].has_key('x-amz-meta-s3cmd-attrs') and cfg.preserve_attrs: + attrs = _parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) + if attrs.has_key('mode'): + os.chmod(dst_file, int(attrs['mode'])) + ## FIXME: uid/gid and mtime/ctime handling comes here! TODO + except OSError, e: + if e.errno == errno.EEXIST: + warning("%s exists - not overwriting" % (dst_file)) + continue + raise + except IOError, e: + ## See if it's missing path and try again + error("%s: %s" % (file, e)) + continue + finally: + ## Close the file if still open. Don't care if not. + try: + dst_stream.close() + except: + pass + except S3DownloadError, e: + error("%s: download failed too many times. Skipping that file." % file) + continue + speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) + output("File '%s' stored as %s (%d bytes in %0.1f seconds, %0.2f %sB/s) [%d of %d]" % + (uri, dst_file, response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1], + seq, total_count)) + total_size += response["size"] + + total_elapsed = time.time() - timestamp_start + speed_fmt = formatSize(total_size/total_elapsed, human_readable = True, floating_point = True) + output("Done. Downloaded %d bytes in %0.1f seconds, %0.2f %sB/s" % + (total_size, total_elapsed, speed_fmt[0], speed_fmt[1])) + def cmd_sync_local2remote(src, dst): def _build_attr_header(src): attrs = {} @@ -427,6 +535,7 @@ _compare_filelists(loc_list, rem_list, True) output("Summary: %d local files to upload, %d remote files to delete" % (len(loc_list), len(rem_list))) + for file in rem_list: uri = S3Uri("s3://" + dst_uri.bucket()+"/"+rem_list[file]['object_key']) if cfg.delete_removed: @@ -454,7 +563,7 @@ try: response = s3.object_put_uri(src, uri, attr_header) except S3UploadError, e: - error("Upload of '%s' failed too many times. Skipping that file." % src) + error("%s: upload failed too many times. Skipping that file." % src) continue speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) output("File '%s' stored as %s (%d bytes in %0.1f seconds, %0.2f %sB/s) [%d of %d]" % @@ -473,6 +582,9 @@ if (len(args)): raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param']) + if S3Uri(src).type == "s3" and not src.endswith('/'): + src += "/" + if not dst.endswith('/'): dst += "/" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-04 12:11:07
|
Revision: 182 http://s3tools.svn.sourceforge.net/s3tools/?rev=182&view=rev Author: ludvigm Date: 2008-06-04 05:11:04 -0700 (Wed, 04 Jun 2008) Log Message: ----------- * S3/Config.py: Store more file attributes in sync to S3. * s3cmd: Make sync remote2local more error-resilient. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/Config.py s3cmd/trunk/S3/Utils.py s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-04 10:21:29 UTC (rev 181) +++ s3cmd/trunk/ChangeLog 2008-06-04 12:11:04 UTC (rev 182) @@ -1,3 +1,8 @@ +2008-06-05 Michal Ludvig <mi...@lo...> + + * S3/Config.py: Store more file attributes in sync to S3. + * s3cmd: Make sync remote2local more error-resilient. + 2008-06-04 Michal Ludvig <mi...@lo...> * s3cmd: Implemented cmd_sync_remote2local() for restoring Modified: s3cmd/trunk/S3/Config.py =================================================================== --- s3cmd/trunk/S3/Config.py 2008-06-04 10:21:29 UTC (rev 181) +++ s3cmd/trunk/S3/Config.py 2008-06-04 12:11:04 UTC (rev 182) @@ -29,9 +29,10 @@ preserve_attrs = True preserve_attrs_list = [ 'uname', # Verbose owner Name (e.g. 'root') - #'uid', # Numeric user ID (e.g. 0) + 'uid', # Numeric user ID (e.g. 0) 'gname', # Group name (e.g. 'users') - #'gid', # Numeric group ID (e.g. 100) + 'gid', # Numeric group ID (e.g. 100) + 'atime', # Last access timestamp 'mtime', # Modification timestamp 'ctime', # Creation timestamp 'mode', # File mode (e.g. rwxr-xr-x = 755) Modified: s3cmd/trunk/S3/Utils.py =================================================================== --- s3cmd/trunk/S3/Utils.py 2008-06-04 10:21:29 UTC (rev 181) +++ s3cmd/trunk/S3/Utils.py 2008-06-04 12:11:04 UTC (rev 182) @@ -143,9 +143,9 @@ f.close() return h.hexdigest() -def mkdir_with_parents(dir_name, mode): +def mkdir_with_parents(dir_name): """ - mkdir_with_parents(dst_dir, mode) + mkdir_with_parents(dst_dir) Create directory 'dir_name' with all parent directories @@ -161,10 +161,10 @@ try: debug("mkdir(%s)" % cur_dir) os.mkdir(cur_dir) - except IOError, e: - error("%s: can not make directory: %s" % (cur_dir, e.strerror)) + except (OSError, IOError), e: + warning("%s: can not make directory: %s" % (cur_dir, e.strerror)) return False except Exception, e: - error("%s: %s" % (cur_dir, e)) + warning("%s: %s" % (cur_dir, e)) return False return True Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-04 10:21:29 UTC (rev 181) +++ s3cmd/trunk/s3cmd 2008-06-04 12:11:04 UTC (rev 182) @@ -441,7 +441,7 @@ try: dst_dir = os.path.dirname(dst_file) if not dir_cache.has_key(dst_dir): - dir_cache[dst_dir] = Utils.mkdir_with_parents(dst_dir, mode = 022) + dir_cache[dst_dir] = Utils.mkdir_with_parents(dst_dir) if dir_cache[dst_dir] == False: warning("%s: destination directory not writable: %s" % (file, dst_dir)) continue @@ -463,14 +463,20 @@ attrs = _parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) if attrs.has_key('mode'): os.chmod(dst_file, int(attrs['mode'])) - ## FIXME: uid/gid and mtime/ctime handling comes here! TODO + if attrs.has_key('mtime') or attrs.has_key('atime'): + mtime = attrs.has_key('mtime') and int(attrs['mtime']) or int(time.time()) + atime = attrs.has_key('atime') and int(attrs['atime']) or int(time.time()) + os.utime(dst_file, (atime, mtime)) + ## FIXME: uid/gid / uname/gname handling comes here! TODO except OSError, e: if e.errno == errno.EEXIST: warning("%s exists - not overwriting" % (dst_file)) continue + if e.errno in (errno.EPERM, errno.EACCES): + warning("%s not writable: %s" % (dst_file, e.strerror)) + continue raise - except IOError, e: - ## See if it's missing path and try again + except Exception, e: error("%s: %s" % (file, e)) continue finally: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-04 12:30:47
|
Revision: 183 http://s3tools.svn.sourceforge.net/s3tools/?rev=183&view=rev Author: ludvigm Date: 2008-06-04 05:30:44 -0700 (Wed, 04 Jun 2008) Log Message: ----------- * S3/PkgInfo.py: Bumped up version to 0.9.7 * NEWS: Added 0.9.7 * TODO: Removed completed tasks * s3cmd, s3cmd.1: Updated help texts, removed --dry-run option as it's not implemented. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/NEWS s3cmd/trunk/S3/PkgInfo.py s3cmd/trunk/TODO s3cmd/trunk/s3cmd s3cmd/trunk/s3cmd.1 Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-04 12:11:04 UTC (rev 182) +++ s3cmd/trunk/ChangeLog 2008-06-04 12:30:44 UTC (rev 183) @@ -1,5 +1,13 @@ 2008-06-05 Michal Ludvig <mi...@lo...> + * S3/PkgInfo.py: Bumped up version to 0.9.7 + * NEWS: Added 0.9.7 + * TODO: Removed completed tasks + * s3cmd, s3cmd.1: Updated help texts, + removed --dry-run option as it's not implemented. + +2008-06-05 Michal Ludvig <mi...@lo...> + * S3/Config.py: Store more file attributes in sync to S3. * s3cmd: Make sync remote2local more error-resilient. Modified: s3cmd/trunk/NEWS =================================================================== --- s3cmd/trunk/NEWS 2008-06-04 12:11:04 UTC (rev 182) +++ s3cmd/trunk/NEWS 2008-06-04 12:30:44 UTC (rev 183) @@ -1,3 +1,12 @@ +s3cmd 0.9.7 - 2008-06-05 +=========== +* Implemented 'sync' from S3 back to local folder, including + file attribute restoration. +* Failed uploads are retried on lower speed to improve error + resilience. +* Compare MD5 of the uploaded file, compare with checksum + reported by S3 and re-upload on mismatch. + s3cmd 0.9.6 - 2008-02-28 =========== * Support for setting / guessing MIME-type of uploaded file Modified: s3cmd/trunk/S3/PkgInfo.py =================================================================== --- s3cmd/trunk/S3/PkgInfo.py 2008-06-04 12:11:04 UTC (rev 182) +++ s3cmd/trunk/S3/PkgInfo.py 2008-06-04 12:30:44 UTC (rev 183) @@ -1,5 +1,5 @@ package = "s3cmd" -version = "0.9.7-pre1" +version = "0.9.7" url = "http://s3tools.logix.cz" license = "GPL version 2" short_description = "S3cmd is a tool for managing Amazon S3 storage space." Modified: s3cmd/trunk/TODO =================================================================== --- s3cmd/trunk/TODO 2008-06-04 12:11:04 UTC (rev 182) +++ s3cmd/trunk/TODO 2008-06-04 12:30:44 UTC (rev 183) @@ -7,10 +7,6 @@ with "directories" - Recursion for some commands -- Implement 'sync' from S3 to local filesystem - - Will have to restore file attributes - - Eh, we have to store them first ;-) - - Implement GPG for sync (it's not that easy since it won't be easy to compare the encrypted-remote-object size with local file. @@ -20,12 +16,5 @@ have to do large number for object/HEAD requests. tough call). -- Compute MD5 checksum when sending file to S3 - (that's easy and patch exists but it will slow down - the operation. instead I'm planning to run the - MD5-summor in a separate thread since both MD5-summing - and sending data to S3 can both be time consuming yet - parallel tasks). - - Keep man page up to date and write some more documentation - Yeah, right ;-) Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-04 12:11:04 UTC (rev 182) +++ s3cmd/trunk/s3cmd 2008-06-04 12:30:44 UTC (rev 183) @@ -781,7 +781,7 @@ {"cmd":"get", "label":"Get file from bucket", "param":"s3://BUCKET/OBJECT LOCAL_FILE", "func":cmd_object_get, "argc":1}, {"cmd":"del", "label":"Delete file from bucket", "param":"s3://BUCKET/OBJECT", "func":cmd_object_del, "argc":1}, #{"cmd":"mkdir", "label":"Make a virtual S3 directory", "param":"s3://BUCKET/path/to/dir", "func":cmd_mkdir, "argc":1}, - {"cmd":"sync", "label":"Synchronize a directory tree to S3", "param":"LOCAL_DIR s3://BUCKET[/PREFIX]", "func":cmd_sync, "argc":2}, + {"cmd":"sync", "label":"Synchronize a directory tree to S3", "param":"LOCAL_DIR s3://BUCKET[/PREFIX] or s3://BUCKET[/PREFIX] LOCAL_DIR", "func":cmd_sync, "argc":2}, {"cmd":"du", "label":"Disk usage by buckets", "param":"[s3://BUCKET[/PREFIX]]", "func":cmd_du, "argc":0}, {"cmd":"info", "label":"Get various information about Buckets or Objects", "param":"s3://BUCKET[/OBJECT]", "func":cmd_info, "argc":1}, #{"cmd":"setacl", "label":"Modify Access control list for Bucket or Object", "param":"s3://BUCKET[/OBJECT]", "func":cmd_setacl, "argc":1}, @@ -831,7 +831,7 @@ optparser.add_option("-c", "--config", dest="config", metavar="FILE", help="Config file name. Defaults to %default") optparser.add_option( "--dump-config", dest="dump_config", action="store_true", help="Dump current configuration after parsing config files and command line options and exit.") - optparser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="Only show what should be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though.") + #optparser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="Only show what should be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though.") optparser.add_option("-e", "--encrypt", dest="encrypt", action="store_true", help="Encrypt files before uploading to S3.") optparser.add_option( "--no-encrypt", dest="encrypt", action="store_false", help="Don't encrypt files.") Modified: s3cmd/trunk/s3cmd.1 =================================================================== --- s3cmd/trunk/s3cmd.1 2008-06-04 12:11:04 UTC (rev 182) +++ s3cmd/trunk/s3cmd.1 2008-06-04 12:30:44 UTC (rev 183) @@ -37,8 +37,11 @@ Delete file from bucket .TP \fBsync\fR \fILOCAL_DIR s3://BUCKET[/PREFIX]\fR -Synchronize a directory tree to S3 +Backup a directory tree to S3 .TP +\fBsync\fR \fIs3://BUCKET[/PREFIX] LOCAL_DIR\fR +Restore a tree from S3 to local directory +.TP \fBinfo\fR \fIs3://BUCKET[/OBJECT]\fR Get various information about a Bucket or Object .TP @@ -72,7 +75,7 @@ Options specific to \fBsync\fR command: .TP \fB\-\-delete\-removed\fR -Delete remote objects with no corresponding local file +Delete remote objects with no corresponding local file when \fIsync\fRing \fBto\fR S3 or delete local files with no corresponding object in S3 when \fIsync\fRing \fBfrom\fR S3. .TP \fB\-\-no\-delete\-removed\fR Don't delete remote objects. Default for 'sync' command. @@ -82,9 +85,9 @@ .TP \fB\-\-no\-preserve\fR Don't store filesystem attributes with uploaded files. -.TP -\fB\-n\fR, \fB\-\-dry\-run\fR -Only show what would be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though. +.\".TP +.\"\fB\-n\fR, \fB\-\-dry\-run\fR +.\"Only show what would be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though. .PP Options common for all commands (where it makes sense indeed): .TP This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-09 12:53:52
|
Revision: 186 http://s3tools.svn.sourceforge.net/s3tools/?rev=186&view=rev Author: ludvigm Date: 2008-06-09 05:53:48 -0700 (Mon, 09 Jun 2008) Log Message: ----------- * s3cmd: Added --exclude switch for sync. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-04 14:49:30 UTC (rev 185) +++ s3cmd/trunk/ChangeLog 2008-06-09 12:53:48 UTC (rev 186) @@ -1,3 +1,7 @@ +2008-06-10 Michal Ludvig <mi...@lo...> + + * s3cmd: Added --exclude switch for sync. + 2008-06-05 Michal Ludvig <mi...@lo...> * Released version 0.9.7 Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-04 14:49:30 UTC (rev 185) +++ s3cmd/trunk/s3cmd 2008-06-09 12:53:48 UTC (rev 186) @@ -360,8 +360,23 @@ def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote): output("Verifying checksums...") exists_list = {} + exclude_list = {} for file in src_list.keys(): debug("Checking %s ..." % file) + excluded = False + for r in Config().exclude: + ## all paths start with '/' from the base dir + if r.search(os.sep + file): + ## Can't directly 'continue' to the outer loop + ## therefore this awkward excluded switch :-( + excluded = True + break + if excluded: + info("%s: excluded" % file) + exclude_list = src_list[file] + del(src_list[file]) + continue + if dst_list.has_key(file): debug("%s exists in remote list" % file) ## Check size first @@ -389,7 +404,7 @@ ## Remove from destination-list, all that is left there will be deleted debug("%s removed from destination list" % file) del(dst_list[file]) - return src_list, dst_list, exists_list + return src_list, dst_list, exists_list, exclude_list def cmd_sync_remote2local(src, dst): def _parse_attrs_header(attrs_header): @@ -842,6 +857,9 @@ optparser.add_option( "--no-delete-removed", dest="delete_removed", action="store_false", help="Don't delete remote objects.") optparser.add_option("-p", "--preserve", dest="preserve_attrs", action="store_true", help="Preserve filesystem attributes (mode, ownership, timestamps). Default for [sync] command.") optparser.add_option( "--no-preserve", dest="preserve_attrs", action="store_false", help="Don't store FS attributes") + optparser.add_option( "--exclude", dest="exclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP will be excluded from sync") + #optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude REGEXPs from FILE") + optparser.add_option( "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. Either EU or US (default)") optparser.add_option("-m", "--mime-type", dest="default_mime_type", type="mimetype", metavar="MIME/TYPE", help="Default MIME-type to be set for objects stored.") @@ -899,6 +917,9 @@ ## Some Config() options are not settable from command line pass + for ex in options.exclude: + cfg.exclude.append(re.compile(ex)) + if cfg.encrypt and cfg.gpg_passphrase == "": error("Encryption requested but no passphrase set in config file.") error("Please re-run 's3cmd --configure' and supply it.") This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-09 14:17:26
|
Revision: 188 http://s3tools.svn.sourceforge.net/s3tools/?rev=188&view=rev Author: ludvigm Date: 2008-06-09 07:17:23 -0700 (Mon, 09 Jun 2008) Log Message: ----------- * s3cmd.1, NEWS: Document --exclude Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/NEWS s3cmd/trunk/TODO s3cmd/trunk/s3cmd.1 Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-09 14:11:14 UTC (rev 187) +++ s3cmd/trunk/ChangeLog 2008-06-09 14:17:23 UTC (rev 188) @@ -1,6 +1,7 @@ 2008-06-10 Michal Ludvig <mi...@lo...> * s3cmd: Added --exclude switch for sync. + * s3cmd.1, NEWS: Document --exclude 2008-06-05 Michal Ludvig <mi...@lo...> Modified: s3cmd/trunk/NEWS =================================================================== --- s3cmd/trunk/NEWS 2008-06-09 14:11:14 UTC (rev 187) +++ s3cmd/trunk/NEWS 2008-06-09 14:17:23 UTC (rev 188) @@ -1,3 +1,7 @@ +s3cmd 0.9.8 - ???? +=========== +* Added --exclude option for sync command. + s3cmd 0.9.7 - 2008-06-05 =========== * Implemented 'sync' from S3 back to local folder, including Modified: s3cmd/trunk/TODO =================================================================== --- s3cmd/trunk/TODO 2008-06-09 14:11:14 UTC (rev 187) +++ s3cmd/trunk/TODO 2008-06-09 14:17:23 UTC (rev 188) @@ -1,6 +1,10 @@ TODO list for s3cmd project =========================== +- Finish sync --exclude support. + - Add --exclude-from and --debug-exclude implementations. + - Support shell-style wildcards? Or rsync-style excludes? + - Treat objects with "/" in their name as directories - Will need local cache for bucket listings - More user friendly 'del' operation that would work Modified: s3cmd/trunk/s3cmd.1 =================================================================== --- s3cmd/trunk/s3cmd.1 2008-06-09 14:11:14 UTC (rev 187) +++ s3cmd/trunk/s3cmd.1 2008-06-09 14:17:23 UTC (rev 188) @@ -85,6 +85,15 @@ .TP \fB\-\-no\-preserve\fR Don't store filesystem attributes with uploaded files. +.TP +\fB\-\-exclude REGEXP\fR +Exclude files matching REGEXP from \fIsync\fI. See SYNC COMMAND section for more information. +.TP +\fB\-\-exclude\-from FILE\fR +Same as \-\-exclude but reads REGEXPs from the given FILE instead of expecting them on the command line. +.TP +\fB\-\-debug\-exclude\fR +Display detailed information about matching file names against exclude\-rules. .\".TP .\"\fB\-n\fR, \fB\-\-dry\-run\fR .\"Only show what would be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though. @@ -134,6 +143,56 @@ .B s3cmd version and exit. +.SH SYNC COMMAND +One of the most powerful commands of \fIs3cmd\fR is \fBs3cmd sync\fR used for +synchronising complete directory trees to or from remote S3 storage. +.PP +Basic usage common in backup scenarios is as simple as: +.nf + s3cmd sync /local/path s3://test-bucket/backup +.fi +.PP +This command will find all files under /local/path directory and copy them +to corresponding paths under s3://test-bucket/backup on the remote side. +For example: +.nf +/local/path\fB/file1.ext\fR -> s3://test-bucket/backup\fB/file1.ext\fR +/local/path\fB/dir123/file2.bin\fR -> s3://test-bucket/backup\fB/dir123/file2.bin\fR +.fi + +To retrieve the files back from S3 use inverted syntax: +.nf + s3cmd sync s3://test-bucket/backup/ /tmp/restore +.fi +that will download files: +.nf +s3://test-bucket/backup\fB/file1.ext\fR -> /tmp/restore\fB/file1.ext\fR +s3://test-bucket/backup\fB/dir123/file2.bin\fR -> /tmp/restore\fB/dir123/file2.bin\fR +.fi + +For the purpose of \fB\-\-exclude\fR and \fB\-\-exclude\-from\fR matching the file name +\fIalways\fR begins with \fB/\fR (slash) and has the local or remote common part removed. +For instance in the previous example the file names tested against --exclude list +will be \fB/\fRfile1.ext and \fB/\fRdir123/file2.bin, that is both with the leading +slash regardless whether you specified s3://test-bucket/backup or +s3://test-bucket/backup/ (note the trailing slash) on the command line. + +Both \fB\-\-exclude\fR and \fB\-\-exclude\-from\fR options expect regular expressions, not +shell-style wildcards! Run s3cmd with \fB\-\-debug\-exclude\fR to get a detailed list of +matching file names against exclude rules. + +For example to exclude all files with ".bin" extension use: +.PP + \-\-exclude '\.bin$' +.PP +to exclude all hidden files and subdirectories (i.e. those whose name begins with dot ".") use: +.PP + \-\-exclude '/\.' +.PP +on the other hand to exclude only hidden files but not hidden subdirectories use: +.PP + \-\-exclude '/\.[^/]*$' + .SH AUTHOR Written by Michal Ludvig <mi...@lo...> .SH REPORTING BUGS This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-10 23:57:58
|
Revision: 189 http://s3tools.svn.sourceforge.net/s3tools/?rev=189&view=rev Author: ludvigm Date: 2008-06-10 16:57:56 -0700 (Tue, 10 Jun 2008) Log Message: ----------- * s3cmd: Remove python 2.5 specific code (try/except/finally block) and make s3cmd compatible with python 2.4 again. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-09 14:17:23 UTC (rev 188) +++ s3cmd/trunk/ChangeLog 2008-06-10 23:57:56 UTC (rev 189) @@ -1,3 +1,8 @@ +2008-06-11 Michal Ludvig <mi...@lo...> + + * s3cmd: Remove python 2.5 specific code (try/except/finally + block) and make s3cmd compatible with python 2.4 again. + 2008-06-10 Michal Ludvig <mi...@lo...> * s3cmd: Added --exclude switch for sync. Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-09 14:17:23 UTC (rev 188) +++ s3cmd/trunk/s3cmd 2008-06-10 23:57:56 UTC (rev 189) @@ -414,6 +414,13 @@ attrs[key] = val return attrs + def _try_close_dst_stream(dst_stream): + ## Close the file if still open. Don't care if not. + try: + dst_stream.close() + except: + pass + s3 = S3(Config()) src_uri = S3Uri(src) @@ -484,6 +491,7 @@ os.utime(dst_file, (atime, mtime)) ## FIXME: uid/gid / uname/gname handling comes here! TODO except OSError, e: + _try_close_dst_stream(dst_stream) if e.errno == errno.EEXIST: warning("%s exists - not overwriting" % (dst_file)) continue @@ -492,14 +500,13 @@ continue raise except Exception, e: + _try_close_dst_stream(dst_stream) error("%s: %s" % (file, e)) continue - finally: - ## Close the file if still open. Don't care if not. - try: - dst_stream.close() - except: - pass + # We have to keep repeating this call because + # Python 2.4 doesn't support try/except/finally + # construction :-( + _try_close_dst_stream(dst_stream) except S3DownloadError, e: error("%s: download failed too many times. Skipping that file." % file) continue This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-11 01:43:44
|
Revision: 190 http://s3tools.svn.sourceforge.net/s3tools/?rev=190&view=rev Author: ludvigm Date: 2008-06-10 18:43:40 -0700 (Tue, 10 Jun 2008) Log Message: ----------- * s3cmd, S3/Config.py, s3cmd.1: Added --exclude-from and --debug-syncmatch switches for sync. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/Config.py s3cmd/trunk/s3cmd s3cmd/trunk/s3cmd.1 Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-10 23:57:56 UTC (rev 189) +++ s3cmd/trunk/ChangeLog 2008-06-11 01:43:40 UTC (rev 190) @@ -2,6 +2,8 @@ * s3cmd: Remove python 2.5 specific code (try/except/finally block) and make s3cmd compatible with python 2.4 again. + * s3cmd, S3/Config.py, s3cmd.1: Added --exclude-from and --debug-syncmatch + switches for sync. 2008-06-10 Michal Ludvig <mi...@lo...> Modified: s3cmd/trunk/S3/Config.py =================================================================== --- s3cmd/trunk/S3/Config.py 2008-06-10 23:57:56 UTC (rev 189) +++ s3cmd/trunk/S3/Config.py 2008-06-11 01:43:40 UTC (rev 190) @@ -48,7 +48,11 @@ bucket_location = "US" default_mime_type = "binary/octet-stream" guess_mime_type = False + debug_syncmatch = False + # List of compiled REGEXPs exclude = [] + # Dict mapping compiled REGEXPs back to their textual form + debug_exclude = {} ## Creating a singleton def __new__(self, configfile = None): Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-10 23:57:56 UTC (rev 189) +++ s3cmd/trunk/s3cmd 2008-06-11 01:43:40 UTC (rev 190) @@ -359,29 +359,37 @@ def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote): output("Verifying checksums...") + cfg = Config() exists_list = {} exclude_list = {} + if cfg.debug_syncmatch: + logging.root.setLevel(logging.DEBUG) for file in src_list.keys(): - debug("Checking %s ..." % file) + if not cfg.debug_syncmatch: + debug("CHECK: %s" % (os.sep + file)) excluded = False - for r in Config().exclude: + for r in cfg.exclude: ## all paths start with '/' from the base dir if r.search(os.sep + file): ## Can't directly 'continue' to the outer loop ## therefore this awkward excluded switch :-( excluded = True + if cfg.debug_syncmatch: + debug("EXCL: %s" % (os.sep + file)) + debug("RULE: '%s'" % (cfg.debug_exclude[r])) + else: + info("%s: excluded" % file) break if excluded: - info("%s: excluded" % file) exclude_list = src_list[file] del(src_list[file]) continue - + else: + debug("PASS: %s" % (os.sep + file)) if dst_list.has_key(file): - debug("%s exists in remote list" % file) ## Check size first if dst_list[file]['size'] == src_list[file]['size']: - debug("%s same size: %s" % (file, dst_list[file]['size'])) + #debug("%s same size: %s" % (file, dst_list[file]['size'])) ## ... same size, check MD5 if src_is_local_and_dst_is_remote: src_md5 = Utils.hash_file_md5(src_list[file]['full_name']) @@ -390,20 +398,24 @@ src_md5 = src_list[file]['md5'] dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name']) if src_md5 == dst_md5: - debug("%s md5 matches: %s" % (file, dst_md5)) + #debug("%s md5 matches: %s" % (file, dst_md5)) ## Checksums are the same. ## Remove from source-list, all that is left there will be transferred - debug("%s removed from source list - transfer not needed" % file) + debug("IGNR: %s (transfer not needed: MD5 OK, Size OK)" % file) exists_list[file] = src_list[file] del(src_list[file]) else: - debug("! %s md5 mismatch: src=%s dst=%s" % (file, src_md5, dst_md5)) + debug("XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) else: - debug("! %s size mismatch: src=%s dst=%s" % (file, src_list[file]['size'], dst_list[file]['size'])) + debug("XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) ## Remove from destination-list, all that is left there will be deleted - debug("%s removed from destination list" % file) + #debug("%s removed from destination list" % file) del(dst_list[file]) + if cfg.debug_syncmatch: + warning("Exiting because of --debug-syncmatch") + sys.exit(0) + return src_list, dst_list, exists_list, exclude_list def cmd_sync_remote2local(src, dst): @@ -414,13 +426,6 @@ attrs[key] = val return attrs - def _try_close_dst_stream(dst_stream): - ## Close the file if still open. Don't care if not. - try: - dst_stream.close() - except: - pass - s3 = S3(Config()) src_uri = S3Uri(src) @@ -491,7 +496,8 @@ os.utime(dst_file, (atime, mtime)) ## FIXME: uid/gid / uname/gname handling comes here! TODO except OSError, e: - _try_close_dst_stream(dst_stream) + try: dst_stream.close() + except: pass if e.errno == errno.EEXIST: warning("%s exists - not overwriting" % (dst_file)) continue @@ -499,14 +505,21 @@ warning("%s not writable: %s" % (dst_file, e.strerror)) continue raise + except KeyboardInterrupt: + try: dst_stream.close() + except: pass + warning("Exiting after keyboard interrupt") + return except Exception, e: - _try_close_dst_stream(dst_stream) + try: dst_stream.close() + except: pass error("%s: %s" % (file, e)) continue # We have to keep repeating this call because # Python 2.4 doesn't support try/except/finally # construction :-( - _try_close_dst_stream(dst_stream) + try: dst_stream.close() + except: pass except S3DownloadError, e: error("%s: download failed too many times. Skipping that file." % file) continue @@ -865,7 +878,8 @@ optparser.add_option("-p", "--preserve", dest="preserve_attrs", action="store_true", help="Preserve filesystem attributes (mode, ownership, timestamps). Default for [sync] command.") optparser.add_option( "--no-preserve", dest="preserve_attrs", action="store_false", help="Don't store FS attributes") optparser.add_option( "--exclude", dest="exclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP will be excluded from sync") - #optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude REGEXPs from FILE") + optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude REGEXPs from FILE") + optparser.add_option( "--debug-syncmatch", dest="debug_syncmatch", action="store_true", help="Output detailed information about remote vs. local filelist matching and then exit") optparser.add_option( "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. Either EU or US (default)") @@ -924,9 +938,28 @@ ## Some Config() options are not settable from command line pass - for ex in options.exclude: - cfg.exclude.append(re.compile(ex)) + if options.exclude is None: + options.exclude = [] + if options.exclude_from: + for exf in options.exclude_from: + debug("processing --exclude-from %s" % exf) + exfi = open(exf, "rt") + for ex in exfi: + ex = ex.strip() + if re.match("^#", ex) or re.match("^\s*$", ex): + continue + debug("adding rule: %s" % ex) + options.exclude.append(ex) + + if options.exclude: + for ex in options.exclude: + debug("processing rule: %s" % ex) + exc = re.compile(ex) + cfg.exclude.append(exc) + if options.debug_syncmatch: + cfg.debug_exclude[exc] = ex + if cfg.encrypt and cfg.gpg_passphrase == "": error("Encryption requested but no passphrase set in config file.") error("Please re-run 's3cmd --configure' and supply it.") Modified: s3cmd/trunk/s3cmd.1 =================================================================== --- s3cmd/trunk/s3cmd.1 2008-06-10 23:57:56 UTC (rev 189) +++ s3cmd/trunk/s3cmd.1 2008-06-11 01:43:40 UTC (rev 190) @@ -92,8 +92,8 @@ \fB\-\-exclude\-from FILE\fR Same as \-\-exclude but reads REGEXPs from the given FILE instead of expecting them on the command line. .TP -\fB\-\-debug\-exclude\fR -Display detailed information about matching file names against exclude\-rules. +\fB\-\-debug\-syncmatch\fR +Display detailed information about matching file names against exclude\-rules as well as information about remote vs local filelists matching. S3cmd exits after performing the match and no actual transfer takes place. .\".TP .\"\fB\-n\fR, \fB\-\-dry\-run\fR .\"Only show what would be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though. @@ -178,8 +178,8 @@ s3://test-bucket/backup/ (note the trailing slash) on the command line. Both \fB\-\-exclude\fR and \fB\-\-exclude\-from\fR options expect regular expressions, not -shell-style wildcards! Run s3cmd with \fB\-\-debug\-exclude\fR to get a detailed list of -matching file names against exclude rules. +shell-style wildcards! Run s3cmd with \fB\-\-debug\-syncmatch\fR to get detailed information +about matching file names against exclude rules. For example to exclude all files with ".bin" extension use: .PP This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-11 02:22:02
|
Revision: 191 http://s3tools.svn.sourceforge.net/s3tools/?rev=191&view=rev Author: ludvigm Date: 2008-06-10 19:21:59 -0700 (Tue, 10 Jun 2008) Log Message: ----------- * S3/PkgInfo.py: Version 0.9.8-rc1 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/PkgInfo.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-11 01:43:40 UTC (rev 190) +++ s3cmd/trunk/ChangeLog 2008-06-11 02:21:59 UTC (rev 191) @@ -1,5 +1,9 @@ 2008-06-11 Michal Ludvig <mi...@lo...> + * S3/PkgInfo.py: Version 0.9.8-rc1 + +2008-06-11 Michal Ludvig <mi...@lo...> + * s3cmd: Remove python 2.5 specific code (try/except/finally block) and make s3cmd compatible with python 2.4 again. * s3cmd, S3/Config.py, s3cmd.1: Added --exclude-from and --debug-syncmatch Modified: s3cmd/trunk/S3/PkgInfo.py =================================================================== --- s3cmd/trunk/S3/PkgInfo.py 2008-06-11 01:43:40 UTC (rev 190) +++ s3cmd/trunk/S3/PkgInfo.py 2008-06-11 02:21:59 UTC (rev 191) @@ -1,5 +1,5 @@ package = "s3cmd" -version = "0.9.7" +version = "0.9.8-rc1" url = "http://s3tools.logix.cz" license = "GPL version 2" short_description = "S3cmd is a tool for managing Amazon S3 storage space." This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-11 15:06:19
|
Revision: 192 http://s3tools.svn.sourceforge.net/s3tools/?rev=192&view=rev Author: ludvigm Date: 2008-06-11 08:06:15 -0700 (Wed, 11 Jun 2008) Log Message: ----------- * s3cmd, s3cmd.1: Added GLOB (shell-style wildcard) exclude, renamed orig regexp-style --exclude to --rexclude Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd s3cmd/trunk/s3cmd.1 Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-11 02:21:59 UTC (rev 191) +++ s3cmd/trunk/ChangeLog 2008-06-11 15:06:15 UTC (rev 192) @@ -1,3 +1,8 @@ +2008-06-12 Michal Ludvig <mi...@lo...> + + * s3cmd, s3cmd.1: Added GLOB (shell-style wildcard) exclude, renamed + orig regexp-style --exclude to --rexclude + 2008-06-11 Michal Ludvig <mi...@lo...> * S3/PkgInfo.py: Version 0.9.8-rc1 Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-11 02:21:59 UTC (rev 191) +++ s3cmd/trunk/s3cmd 2008-06-11 15:06:15 UTC (rev 192) @@ -12,6 +12,7 @@ import re import errno import pwd, grp +import glob from copy import copy from optparse import OptionParser, Option, OptionValueError, IndentedHelpFormatter @@ -806,6 +807,15 @@ error("Writing config file failed: %s: %s" % (config_file, e.strerror)) sys.exit(1) +def process_exclude_from_file(exf, exclude_array): + exfi = open(exf, "rt") + for ex in exfi: + ex = ex.strip() + if re.match("^#", ex) or re.match("^\s*$", ex): + continue + debug("adding rule: %s" % ex) + exclude_array.append(ex) + commands = {} commands_list = [ {"cmd":"mb", "label":"Make bucket", "param":"s3://BUCKET", "func":cmd_bucket_create, "argc":1}, @@ -877,8 +887,10 @@ optparser.add_option( "--no-delete-removed", dest="delete_removed", action="store_false", help="Don't delete remote objects.") optparser.add_option("-p", "--preserve", dest="preserve_attrs", action="store_true", help="Preserve filesystem attributes (mode, ownership, timestamps). Default for [sync] command.") optparser.add_option( "--no-preserve", dest="preserve_attrs", action="store_false", help="Don't store FS attributes") - optparser.add_option( "--exclude", dest="exclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP will be excluded from sync") - optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude REGEXPs from FILE") + optparser.add_option( "--exclude", dest="exclude", action="append", metavar="GLOB", help="Filenames and paths matching GLOB will be excluded from sync") + optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude GLOBs from FILE") + optparser.add_option( "--rexclude", dest="rexclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP (regular expression) will be excluded from sync") + optparser.add_option( "--rexclude-from", dest="rexclude_from", action="append", metavar="FILE", help="Read --rexclude REGEXPs from FILE") optparser.add_option( "--debug-syncmatch", dest="debug_syncmatch", action="store_true", help="Output detailed information about remote vs. local filelist matching and then exit") optparser.add_option( "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. Either EU or US (default)") @@ -938,23 +950,35 @@ ## Some Config() options are not settable from command line pass + ## Process GLOB (shell wildcard style) excludes if options.exclude is None: options.exclude = [] if options.exclude_from: for exf in options.exclude_from: debug("processing --exclude-from %s" % exf) - exfi = open(exf, "rt") - for ex in exfi: - ex = ex.strip() - if re.match("^#", ex) or re.match("^\s*$", ex): - continue - debug("adding rule: %s" % ex) - options.exclude.append(ex) + process_exclude_from_file(exf, options.exclude) if options.exclude: for ex in options.exclude: debug("processing rule: %s" % ex) + exc = re.compile(glob.fnmatch.translate(ex)) + cfg.exclude.append(exc) + if options.debug_syncmatch: + cfg.debug_exclude[exc] = ex + + ## Process REGEXP style excludes + if options.rexclude is None: + options.rexclude = [] + + if options.rexclude_from: + for exf in options.rexclude_from: + debug("processing --rexclude-from %s" % exf) + process_exclude_from_file(exf, options.rexclude) + + if options.rexclude: + for ex in options.rexclude: + debug("processing rule: %s" % ex) exc = re.compile(ex) cfg.exclude.append(exc) if options.debug_syncmatch: Modified: s3cmd/trunk/s3cmd.1 =================================================================== --- s3cmd/trunk/s3cmd.1 2008-06-11 02:21:59 UTC (rev 191) +++ s3cmd/trunk/s3cmd.1 2008-06-11 15:06:15 UTC (rev 192) @@ -86,12 +86,18 @@ \fB\-\-no\-preserve\fR Don't store filesystem attributes with uploaded files. .TP -\fB\-\-exclude REGEXP\fR -Exclude files matching REGEXP from \fIsync\fI. See SYNC COMMAND section for more information. +\fB\-\-exclude GLOB\fR +Exclude files matching GLOB (a.k.a. shell-style wildcard) from \fIsync\fI. See SYNC COMMAND section for more information. .TP \fB\-\-exclude\-from FILE\fR -Same as \-\-exclude but reads REGEXPs from the given FILE instead of expecting them on the command line. +Same as \-\-exclude but reads GLOBs from the given FILE instead of expecting them on the command line. .TP +\fB\-\-rexclude REGEXP\fR +Same as \-\-exclude but works with REGEXPs (Regular expressions). +.TP +\fB\-\-rexclude\-from FILE\fR +Same as \-\-exclude\-from but works with REGEXPs. +.TP \fB\-\-debug\-syncmatch\fR Display detailed information about matching file names against exclude\-rules as well as information about remote vs local filelists matching. S3cmd exits after performing the match and no actual transfer takes place. .\".TP @@ -177,21 +183,26 @@ slash regardless whether you specified s3://test-bucket/backup or s3://test-bucket/backup/ (note the trailing slash) on the command line. -Both \fB\-\-exclude\fR and \fB\-\-exclude\-from\fR options expect regular expressions, not -shell-style wildcards! Run s3cmd with \fB\-\-debug\-syncmatch\fR to get detailed information +Both \fB\-\-exclude\fR and \fB\-\-exclude\-from\fR work with shell-style wildcards (a.k.a. GLOB). +For a greater flexibility s3cmd provides Regular-expression versions of the two exclude options +named \fB\-\-rexclude\fR and \fB\-\-rexclude\-from\fR. + +Run s3cmd with \fB\-\-debug\-syncmatch\fR to get detailed information about matching file names against exclude rules. -For example to exclude all files with ".bin" extension use: +For example to exclude all files with ".bin" extension with a REGEXP use: .PP - \-\-exclude '\.bin$' + \-\-rexclude '\.bin$' .PP -to exclude all hidden files and subdirectories (i.e. those whose name begins with dot ".") use: +to exclude all hidden files and subdirectories (i.e. those whose name begins with dot ".") use GLOB: .PP - \-\-exclude '/\.' + \-\-exclude '/.*' .PP -on the other hand to exclude only hidden files but not hidden subdirectories use: +on the other hand to exclude only hidden files but not hidden subdirectories use REGEXP: .PP - \-\-exclude '/\.[^/]*$' + \-\-rexclude '/\.[^/]*$' +.PP +etc... .SH AUTHOR Written by Michal Ludvig <mi...@lo...> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-11 15:15:59
|
Revision: 193 http://s3tools.svn.sourceforge.net/s3tools/?rev=193&view=rev Author: ludvigm Date: 2008-06-11 08:15:37 -0700 (Wed, 11 Jun 2008) Log Message: ----------- * S3/PkgInfo.py: Version 0.9.8-rc2 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/PkgInfo.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-11 15:06:15 UTC (rev 192) +++ s3cmd/trunk/ChangeLog 2008-06-11 15:15:37 UTC (rev 193) @@ -1,5 +1,9 @@ 2008-06-12 Michal Ludvig <mi...@lo...> + * S3/PkgInfo.py: Version 0.9.8-rc2 + +2008-06-12 Michal Ludvig <mi...@lo...> + * s3cmd, s3cmd.1: Added GLOB (shell-style wildcard) exclude, renamed orig regexp-style --exclude to --rexclude Modified: s3cmd/trunk/S3/PkgInfo.py =================================================================== --- s3cmd/trunk/S3/PkgInfo.py 2008-06-11 15:06:15 UTC (rev 192) +++ s3cmd/trunk/S3/PkgInfo.py 2008-06-11 15:15:37 UTC (rev 193) @@ -1,5 +1,5 @@ package = "s3cmd" -version = "0.9.8-rc1" +version = "0.9.8-rc2" url = "http://s3tools.logix.cz" license = "GPL version 2" short_description = "S3cmd is a tool for managing Amazon S3 storage space." This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-17 14:13:41
|
Revision: 194 http://s3tools.svn.sourceforge.net/s3tools/?rev=194&view=rev Author: ludvigm Date: 2008-06-17 07:13:32 -0700 (Tue, 17 Jun 2008) Log Message: ----------- * S3/S3.py: Bucket name can't contain upper-case letters (S3/DNS limitation). Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/S3.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-11 15:15:37 UTC (rev 193) +++ s3cmd/trunk/ChangeLog 2008-06-17 14:13:32 UTC (rev 194) @@ -1,3 +1,7 @@ +2008-06-13 Michal Ludvig <mi...@lo...> + + * S3/S3.py: Bucket name can't contain upper-case letters (S3/DNS limitation). + 2008-06-12 Michal Ludvig <mi...@lo...> * S3/PkgInfo.py: Version 0.9.8-rc2 Modified: s3cmd/trunk/S3/S3.py =================================================================== --- s3cmd/trunk/S3/S3.py 2008-06-11 15:15:37 UTC (rev 193) +++ s3cmd/trunk/S3/S3.py 2008-06-17 14:13:32 UTC (rev 194) @@ -469,8 +469,9 @@ return base64.encodestring(hmac.new(self.config.secret_key, h, sha).digest()).strip() def check_bucket_name(self, bucket): - if re.compile("[^A-Za-z0-9\._-]").search(bucket): - raise ParameterError("Bucket name '%s' contains unallowed characters" % bucket) + invalid = re.compile("([^a-z0-9\._-])").search(bucket) + if invalid: + raise ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: lowercase us-ascii letters (a-z), digits (0-9), dot (.), hyphen (-) and underscore (_)." % (bucket, invalid.groups()[0])) if len(bucket) < 3: raise ParameterError("Bucket name '%s' is too short (min 3 characters)" % bucket) if len(bucket) > 255: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-17 14:15:16
|
Revision: 195 http://s3tools.svn.sourceforge.net/s3tools/?rev=195&view=rev Author: ludvigm Date: 2008-06-17 07:15:12 -0700 (Tue, 17 Jun 2008) Log Message: ----------- * S3/PkgInfo.py: Version 0.9.8-rc3 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/S3/PkgInfo.py Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-17 14:13:32 UTC (rev 194) +++ s3cmd/trunk/ChangeLog 2008-06-17 14:15:12 UTC (rev 195) @@ -1,5 +1,9 @@ -2008-06-13 Michal Ludvig <mi...@lo...> +2008-06-18 Michal Ludvig <mi...@lo...> + * S3/PkgInfo.py: Version 0.9.8-rc3 + +2008-06-18 Michal Ludvig <mi...@lo...> + * S3/S3.py: Bucket name can't contain upper-case letters (S3/DNS limitation). 2008-06-12 Michal Ludvig <mi...@lo...> Modified: s3cmd/trunk/S3/PkgInfo.py =================================================================== --- s3cmd/trunk/S3/PkgInfo.py 2008-06-17 14:13:32 UTC (rev 194) +++ s3cmd/trunk/S3/PkgInfo.py 2008-06-17 14:15:12 UTC (rev 195) @@ -1,5 +1,5 @@ package = "s3cmd" -version = "0.9.8-rc2" +version = "0.9.8-rc3" url = "http://s3tools.logix.cz" license = "GPL version 2" short_description = "S3cmd is a tool for managing Amazon S3 storage space." This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-23 03:47:06
|
Revision: 196 http://s3tools.svn.sourceforge.net/s3tools/?rev=196&view=rev Author: ludvigm Date: 2008-06-22 20:47:01 -0700 (Sun, 22 Jun 2008) Log Message: ----------- * s3cmd: Wrapped all execution in a try/except block to catch all exceptions and ask for a report. Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-17 14:15:12 UTC (rev 195) +++ s3cmd/trunk/ChangeLog 2008-06-23 03:47:01 UTC (rev 196) @@ -1,3 +1,8 @@ +2008-06-23 Michal Ludvig <mi...@lo...> + + * s3cmd: Wrapped all execution in a try/except block + to catch all exceptions and ask for a report. + 2008-06-18 Michal Ludvig <mi...@lo...> * S3/PkgInfo.py: Version 0.9.8-rc3 Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-17 14:15:12 UTC (rev 195) +++ s3cmd/trunk/s3cmd 2008-06-23 03:47:01 UTC (rev 196) @@ -13,20 +13,13 @@ import errno import pwd, grp import glob +import traceback from copy import copy from optparse import OptionParser, Option, OptionValueError, IndentedHelpFormatter from logging import debug, info, warning, error from distutils.spawn import find_executable -## Our modules -from S3 import PkgInfo -from S3.S3 import * -from S3.Config import Config -from S3.S3Uri import * -from S3 import Utils -from S3.Exceptions import * - def output(message): print message @@ -855,7 +848,7 @@ else: return "" -if __name__ == '__main__': +def main(): if float("%d.%d" %(sys.version_info[0], sys.version_info[1])) < 2.4: sys.stderr.write("ERROR: Python 2.4 or higher required, sorry.\n") sys.exit(1) @@ -1025,5 +1018,35 @@ error("Parameter problem: " + str(e)) sys.exit(1) - sys.exit(0) +if __name__ == '__main__': + try: + ## Our modules + ## Keep them in try/except block to + ## detect any syntax errors in there + from S3 import PkgInfo + from S3.S3 import * + from S3.Config import Config + from S3.S3Uri import * + from S3 import Utils + from S3.Exceptions import * + main() + sys.exit(0) + except Exception, e: + sys.stderr.write(""" +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + An unexpected error has occurred. + Please report the following lines to: + s3t...@li... +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +""") + sys.stderr.write(traceback.format_exc(sys.exc_info())+"\n") + sys.stderr.write(""" +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + An unexpected error has occurred. + Please report the above lines to: + s3t...@li... +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +""") + sys.exit(1) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-23 04:01:06
|
Revision: 197 http://s3tools.svn.sourceforge.net/s3tools/?rev=197&view=rev Author: ludvigm Date: 2008-06-22 21:01:04 -0700 (Sun, 22 Jun 2008) Log Message: ----------- * s3cmd: Don't require $HOME env variable to be set. Fixes #2000133 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-23 03:47:01 UTC (rev 196) +++ s3cmd/trunk/ChangeLog 2008-06-23 04:01:04 UTC (rev 197) @@ -1,5 +1,7 @@ 2008-06-23 Michal Ludvig <mi...@lo...> + * s3cmd: Don't require $HOME env variable to be set. + Fixes #2000133 * s3cmd: Wrapped all execution in a try/except block to catch all exceptions and ask for a report. Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-23 03:47:01 UTC (rev 196) +++ s3cmd/trunk/s3cmd 2008-06-23 04:01:04 UTC (rev 197) @@ -862,7 +862,9 @@ optparser = OptionParser(option_class=OptionMimeType, formatter=MyHelpFormatter()) #optparser.disable_interspersed_args() - optparser.set_defaults(config=os.getenv("HOME")+"/.s3cfg") + if os.getenv("HOME"): + optparser.set_defaults(config=os.getenv("HOME")+"/.s3cfg") + optparser.set_defaults(verbosity = default_verbosity) optparser.add_option( "--configure", dest="run_configure", action="store_true", help="Invoke interactive (re)configuration tool.") @@ -916,6 +918,10 @@ sys.exit(0) ## Now finally parse the config file + if not options.config: + error("Can't find a config file. Please use --config option.") + sys.exit(1) + try: cfg = Config(options.config) except IOError, e: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-23 04:46:15
|
Revision: 198 http://s3tools.svn.sourceforge.net/s3tools/?rev=198&view=rev Author: ludvigm Date: 2008-06-22 21:46:03 -0700 (Sun, 22 Jun 2008) Log Message: ----------- * S3/PkgInfo.py: Bumped up version to 0.9.8 * NEWS: Added 0.9.8 * TODO: Removed completed tasks * s3cmd, s3cmd.1: --debug-exclude is an alias for --debug-syncmatch Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/NEWS s3cmd/trunk/S3/PkgInfo.py s3cmd/trunk/TODO s3cmd/trunk/s3cmd s3cmd/trunk/s3cmd.1 Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-23 04:01:04 UTC (rev 197) +++ s3cmd/trunk/ChangeLog 2008-06-23 04:46:03 UTC (rev 198) @@ -1,5 +1,17 @@ 2008-06-23 Michal Ludvig <mi...@lo...> + * Released version 0.9.8 + ---------------------- + +2008-06-23 Michal Ludvig <mi...@lo...> + + * S3/PkgInfo.py: Bumped up version to 0.9.8 + * NEWS: Added 0.9.8 + * TODO: Removed completed tasks + +2008-06-23 Michal Ludvig <mi...@lo...> + + * s3cmd, s3cmd.1: --debug-exclude is an alias for --debug-syncmatch * s3cmd: Don't require $HOME env variable to be set. Fixes #2000133 * s3cmd: Wrapped all execution in a try/except block Modified: s3cmd/trunk/NEWS =================================================================== --- s3cmd/trunk/NEWS 2008-06-23 04:01:04 UTC (rev 197) +++ s3cmd/trunk/NEWS 2008-06-23 04:46:03 UTC (rev 198) @@ -1,6 +1,8 @@ -s3cmd 0.9.8 - ???? +s3cmd 0.9.8 - 2008-06-23 =========== -* Added --exclude option for sync command. +* Added --exclude / --rexclude options for sync command. +* Doesn't require $HOME env variable to be set anymore. +* Better checking of bucket names to Amazon S3 rules. s3cmd 0.9.7 - 2008-06-05 =========== Modified: s3cmd/trunk/S3/PkgInfo.py =================================================================== --- s3cmd/trunk/S3/PkgInfo.py 2008-06-23 04:01:04 UTC (rev 197) +++ s3cmd/trunk/S3/PkgInfo.py 2008-06-23 04:46:03 UTC (rev 198) @@ -1,5 +1,5 @@ package = "s3cmd" -version = "0.9.8-rc3" +version = "0.9.8" url = "http://s3tools.logix.cz" license = "GPL version 2" short_description = "S3cmd is a tool for managing Amazon S3 storage space." Modified: s3cmd/trunk/TODO =================================================================== --- s3cmd/trunk/TODO 2008-06-23 04:01:04 UTC (rev 197) +++ s3cmd/trunk/TODO 2008-06-23 04:46:03 UTC (rev 198) @@ -1,10 +1,6 @@ TODO list for s3cmd project =========================== -- Finish sync --exclude support. - - Add --exclude-from and --debug-exclude implementations. - - Support shell-style wildcards? Or rsync-style excludes? - - Treat objects with "/" in their name as directories - Will need local cache for bucket listings - More user friendly 'del' operation that would work Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-23 04:01:04 UTC (rev 197) +++ s3cmd/trunk/s3cmd 2008-06-23 04:46:03 UTC (rev 198) @@ -886,7 +886,7 @@ optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude GLOBs from FILE") optparser.add_option( "--rexclude", dest="rexclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP (regular expression) will be excluded from sync") optparser.add_option( "--rexclude-from", dest="rexclude_from", action="append", metavar="FILE", help="Read --rexclude REGEXPs from FILE") - optparser.add_option( "--debug-syncmatch", dest="debug_syncmatch", action="store_true", help="Output detailed information about remote vs. local filelist matching and then exit") + optparser.add_option( "--debug-syncmatch", "--debug-exclude", dest="debug_syncmatch", action="store_true", help="Output detailed information about remote vs. local filelist matching and --exclude processing and then exit") optparser.add_option( "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. Either EU or US (default)") Modified: s3cmd/trunk/s3cmd.1 =================================================================== --- s3cmd/trunk/s3cmd.1 2008-06-23 04:01:04 UTC (rev 197) +++ s3cmd/trunk/s3cmd.1 2008-06-23 04:46:03 UTC (rev 198) @@ -98,7 +98,7 @@ \fB\-\-rexclude\-from FILE\fR Same as \-\-exclude\-from but works with REGEXPs. .TP -\fB\-\-debug\-syncmatch\fR +\fB\-\-debug\-syncmatch\fR or \fB\-\-debug\-exclude\fR (alias) Display detailed information about matching file names against exclude\-rules as well as information about remote vs local filelists matching. S3cmd exits after performing the match and no actual transfer takes place. .\".TP .\"\fB\-n\fR, \fB\-\-dry\-run\fR This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lu...@us...> - 2008-06-23 05:36:21
|
Revision: 199 http://s3tools.svn.sourceforge.net/s3tools/?rev=199&view=rev Author: ludvigm Date: 2008-06-22 22:36:16 -0700 (Sun, 22 Jun 2008) Log Message: ----------- * s3cmd: Last-minute compatibility fixes for Python 2.4 Modified Paths: -------------- s3cmd/trunk/ChangeLog s3cmd/trunk/s3cmd Modified: s3cmd/trunk/ChangeLog =================================================================== --- s3cmd/trunk/ChangeLog 2008-06-23 04:46:03 UTC (rev 198) +++ s3cmd/trunk/ChangeLog 2008-06-23 05:36:16 UTC (rev 199) @@ -11,6 +11,7 @@ 2008-06-23 Michal Ludvig <mi...@lo...> + * s3cmd: Last-minute compatibility fixes for Python 2.4 * s3cmd, s3cmd.1: --debug-exclude is an alias for --debug-syncmatch * s3cmd: Don't require $HOME env variable to be set. Fixes #2000133 Modified: s3cmd/trunk/s3cmd =================================================================== --- s3cmd/trunk/s3cmd 2008-06-23 04:46:03 UTC (rev 198) +++ s3cmd/trunk/s3cmd 2008-06-23 05:36:16 UTC (rev 199) @@ -554,7 +554,8 @@ for k in attrs: result += "%s:%s/" % (k, attrs[k]) return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } - s3 = S3(Config()) + cfg = Config() + s3 = S3(cfg) src_uri = S3Uri(src) dst_uri = S3Uri(dst) @@ -1038,6 +1039,9 @@ main() sys.exit(0) + except SystemExit, e: + sys.exit(e.code) + except Exception, e: sys.stderr.write(""" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |