From: Maurits v. R. <svn...@pl...> - 2011-10-05 14:56:35
|
Author: maurits Date: Wed Oct 5 14:54:59 2011 New Revision: 245110 Modified: buildout/collective.recipe.backup/trunk/README.txt buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/CHANGES.txt buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/README.txt buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/copyblobs.py buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/repozorunner.py Log: Allow restoring the blobs to the specified date as well. Modified: buildout/collective.recipe.backup/trunk/README.txt ============================================================================== --- buildout/collective.recipe.backup/trunk/README.txt (original) +++ buildout/collective.recipe.backup/trunk/README.txt Wed Oct 5 14:54:59 2011 @@ -172,11 +172,8 @@ bin/restore 1972-12-25 -Note that restoring to a specific date does not currently work for -blobstorage, but it should not really matter there, as for new or -changed blobfiles a new file is created, so you would just have a few -extra unused files. So here simply the most recent blobstorage backup -will be restored. +Since version 2.3 this also works for restoring blobs. We simply +restore the directory from the first backup after the specified date. Since version 2.0, the restore scripts ask for confirmation before starting the restore, as this is a potentially dangerous command. Modified: buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/CHANGES.txt ============================================================================== --- buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/CHANGES.txt (original) +++ buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/CHANGES.txt Wed Oct 5 14:54:59 2011 @@ -1,9 +1,7 @@ 2.3 (unreleased) ================ -- Moved the restore date option (first command line argument) from - repozorunner to the main restore, so we can pass that to copyblobs - as well (where it currently is ignored though). +- Allow restoring the blobs to the specified date as well. [maurits] Modified: buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/README.txt ============================================================================== --- buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/README.txt (original) +++ buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/README.txt Wed Oct 5 14:54:59 2011 @@ -1,6 +1,13 @@ Example usage ============= +Some needed imports: + + >>> from datetime import datetime + >>> import os + >>> import sys + >>> import time + Just to isolate some test differences, we run an empty buildout once:: >>> ignore = system(buildout) @@ -47,7 +54,6 @@ mock repozo script that prints the options it is passed (and make it executable). It is horridly unix-specific at the moment. - >>> import sys >>> write('bin', 'repozo', ... "#!%s\nimport sys\nprint ' '.join(sys.argv[1:])" % sys.executable) >>> #write('bin', 'repozo', "#!/bin/sh\necho $*") @@ -95,10 +101,8 @@ INFO: Date restriction: restoring state at 1972-12-25. INFO: Please wait while restoring database file: /sample-buildout/var/backups to /sample-buildout/var/filestorage/Data.fs -Note that restoring to a specific date does not currently work for -blobstorage, but it should not really matter there, as for new or -changed blobfiles a new file is created, so you would just have a few -extra unused files. +Note that restoring a blobstorage to a specific date only works since +release 2.3. We will test that a bit further on. Snapshots @@ -448,8 +452,6 @@ We fake three old backups in all the (snapshot)backup directories to test if the 'keep' parameter is working correctly. - >>> import os - >>> import time >>> next_mod_time = time.time() - 1000 >>> def add_backup(dir, name): # same as in the tests in repozorunner.py ... global next_mod_time @@ -651,8 +653,10 @@ >>> ls('var/blobstoragesnapshots/blobstorage.0') d blobstorage -Let's try that some more. +Let's try that some more, with a second in between so we can more +easily test restoring to a specific time later. + >>> time.sleep(1) >>> write('var', 'blobstorage', 'blob2.txt', "Sample blob 2.") >>> print system('bin/snapshotbackup') --backup -f /sample-buildout/var/filestorage/Data.fs -r /sample-buildout/var/snapshotbackups -F --gzip @@ -678,6 +682,7 @@ Now remove an item: + >>> time.sleep(1) >>> remove('var', 'blobstorage', 'blob2.txt') >>> print system('bin/snapshotbackup') --backup -f /sample-buildout/var/filestorage/Data.fs -r /sample-buildout/var/snapshotbackups -F --gzip @@ -716,6 +721,7 @@ We try again with an extra 'blob': + >>> time.sleep(1) >>> write('var', 'blobstorage', 'blob2.txt', "Sample blob 2.") >>> print system('bin/backup') --backup -f /sample-buildout/var/filestorage/Data.fs -r /sample-buildout/var/backups --gzip @@ -736,7 +742,6 @@ Let's check the inodes of two files, to see if they are the same. Not sure if this works on all operating systems. - >>> import os >>> stat_0 = os.stat('var/blobstoragebackups/blobstorage.0/blobstorage/blob1.txt') >>> stat_1 = os.stat('var/blobstoragebackups/blobstorage.1/blobstorage/blob1.txt') >>> stat_0.st_ino == stat_1.st_ino @@ -770,6 +775,33 @@ INFO: Restoring blobs from /sample-buildout/var/blobstoragebackups to /sample-buildout/var/blobstorage INFO: rsync -a --delete /sample-buildout/var/blobstoragebackups/blobstorage.0/blobstorage /sample-buildout/var <BLANKLINE> + >>> ls('var/blobstorage') + - blob1.txt + - blob2.txt + +Since release 2.3 we can also restore blobs to a specific date/time. + + >>> mod_time_0 = os.path.getmtime('var/blobstoragebackups/blobstorage.0') + >>> mod_time_1 = os.path.getmtime('var/blobstoragebackups/blobstorage.1') + >>> mod_time_0 > mod_time_1 + True + >>> time_string = '-'.join([str(t) for t in datetime.utcfromtimestamp(mod_time_1).timetuple()[:6]]) + >>> print system('bin/restore %s' % time_string, input='yes\n') + --recover -o /sample-buildout/var/filestorage/Data.fs -r /sample-buildout/var/backups -D ... + <BLANKLINE> + This will replace the filestorage (Data.fs). + This will replace the blobstorage. + Are you sure? (yes/No)? + INFO: Date restriction: restoring state at ... + INFO: Please wait while restoring database file: /sample-buildout/var/backups to /sample-buildout/var/filestorage/Data.fs + INFO: Restoring blobs from /sample-buildout/var/blobstoragebackups to /sample-buildout/var/blobstorage + INFO: rsync -a --delete /sample-buildout/var/blobstoragebackups/blobstorage.1/blobstorage /sample-buildout/var + <BLANKLINE> + +The second blob file is now no longer in the blob storage. + + >>> ls('var/blobstorage') + - blob1.txt The snapshotrestore works too:: @@ -784,6 +816,52 @@ INFO: rsync -a --delete /sample-buildout/var/blobstoragesnapshots/blobstorage.0/blobstorage /sample-buildout/var <BLANKLINE> +Check that this fits what is in the most recent snapshot:: + + >>> ls('var/blobstorage') + - blob1.txt + >>> ls('var/blobstoragesnapshots') + d blobstorage.0 + d blobstorage.1 + d blobstorage.2 + >>> ls('var/blobstoragesnapshots/blobstorage.0/blobstorage') + - blob1.txt + >>> ls('var/blobstoragesnapshots/blobstorage.1/blobstorage') + - blob1.txt + - blob2.txt + >>> ls('var/blobstoragesnapshots/blobstorage.2/blobstorage') + - blob1.txt + +Since release 2.3 we can also restore blob snapshots to a specific date/time. + + >>> mod_time_0 = os.path.getmtime('var/blobstoragesnapshots/blobstorage.0') + >>> mod_time_1 = os.path.getmtime('var/blobstoragesnapshots/blobstorage.1') + >>> mod_time_2 = os.path.getmtime('var/blobstoragesnapshots/blobstorage.2') + >>> mod_time_0 > mod_time_1 + True + >>> mod_time_1 > mod_time_2 + True + >>> time_string = '-'.join([str(t) for t in datetime.utcfromtimestamp(mod_time_1).timetuple()[:6]]) + >>> print system('bin/snapshotrestore %s' % time_string, input='yes\n') + --recover -o /sample-buildout/var/filestorage/Data.fs -r /sample-buildout/var/snapshotbackups -D ... + <BLANKLINE> + This will replace the filestorage (Data.fs). + This will replace the blobstorage. + Are you sure? (yes/No)? + INFO: Date restriction: restoring state at ... + INFO: Please wait while restoring database file: /sample-buildout/var/snapshotbackups to /sample-buildout/var/filestorage/Data.fs + INFO: Restoring blobs from /sample-buildout/var/blobstoragesnapshots to /sample-buildout/var/blobstorage + INFO: rsync -a --delete /sample-buildout/var/blobstoragesnapshots/blobstorage.1/blobstorage /sample-buildout/var + <BLANKLINE> + +The second blob file was only in blobstorage snapshot number 1 when we +started and now it is also in the main blobstorage again. + + >>> ls('var/blobstorage') + - blob1.txt + - blob2.txt + + We can tell buildout that we only want to backup blobs or specifically do not want to backup the blobs. Modified: buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/copyblobs.py ============================================================================== --- buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/copyblobs.py (original) +++ buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/copyblobs.py Wed Oct 5 14:54:59 2011 @@ -7,6 +7,7 @@ """ from operator import itemgetter +from datetime import datetime import logging import os import shutil @@ -151,6 +152,57 @@ os.path.join(container, new_name)) +def get_blob_backup_dirs(backup_location): + """Get blob backup dirs from this location. + """ + filenames = os.listdir(backup_location) + logger.debug("Looked up filenames in the target dir: %s found. %r.", + len(filenames), filenames) + backup_dirs = [] + prefix = '' + for filename in filenames: + # We only want directories of the form prefix.X, where X is an + # integer. There should not be anything else, but we like to + # be safe. + full_path = os.path.join(backup_location, filename) + if not os.path.isdir(full_path): + continue + if filename in (os.curdir, os.pardir): + # These should not be listed by os.listdir, but again: we + # like to be safe. + continue + parts = filename.split('.') + if len(parts) != 2: + continue + try: + num = int(parts[1]) + except: + # No number + continue + if prefix: + if parts[0] != prefix: + logger.error( + "Different backup prefixes found in %s (%s, %s). Are you " + "mixing two backups in one directory? For safety we will " + "not cleanup old backups here." % ( + backup_location, prefix, parts[0])) + sys.exit(1) + else: + prefix = parts[0] + mod_time = os.path.getmtime(full_path) + backup_dirs.append((num, mod_time, full_path)) + # We always sort by backup number: + backup_dirs = sorted(backup_dirs, key=itemgetter(0)) + # Check if this is the same as reverse sorting by modification time: + mod_times = sorted(backup_dirs, key=itemgetter(1), reverse=True) + if backup_dirs != mod_times: + logger.warn("Sorting blob backups by number gives other result than " + "reverse sorting by last modification time.") + logger.debug("Found %d blob backups: %r.", len(backup_dirs), + [d[1] for d in backup_dirs]) + return backup_dirs + + def backup_blobs(source, destination, full=False, use_rsync=True, keep=0, keep_blob_days=0): """Copy blobs from source to destination. @@ -361,19 +413,48 @@ be careful with that otherwise you may end up with something like var/blobstorage/blobstorage """ - if date is not None: - logger.info("Date argument to restore blobs ignored: %r", date) if destination.endswith(os.sep): # strip that separator destination = destination[:-len(os.sep)] base_name = os.path.basename(destination) dest_dir = os.path.dirname(destination) - last_source = os.path.join(source, base_name + '.0', base_name) + + # Determine the source (blob backup) that should be restored. + backup_source = None + if date is not None: + # From repozo: specify UTC (not local) time in this format: + # yyyy-mm-dd[-hh[-mm[-ss]]] + # Note that this matches the 2011-10-05-12-12-45.fsz that is created. + try: + date_args = [int(num) for num in date.split('-')] + except: + logger.info("Could not parse date argument to restore blobs: %r", + date) + logger.info("Restoring most recent backup instead.") + else: + target_datetime = datetime(*date_args) + backup_dirs = get_blob_backup_dirs(source) + # We want to find the first backup after the requested + # modification time, so we reverse the order. + backup_dirs.reverse() # Note: this reverses in place. + for num, mod_time, directory in backup_dirs: + backup_time = datetime.utcfromtimestamp(mod_time) + if backup_time >= target_datetime: + backup_source = os.path.join(directory, base_name) + break + if not backup_source: + logger.warn("Could not find backup more recent than %r. Using " + "most recent instead.", date) + + if not backup_source: + # The most recent is the default: + backup_source = os.path.join(source, base_name + '.0', base_name) + # You should end up with something like this: #rsync -a --delete var/blobstoragebackups/blobstorage.0/blobstorage var/ if use_rsync: cmd = 'rsync -a --delete %(source)s %(dest)s' % dict( - source=last_source, + source=backup_source, dest=dest_dir) logger.info(cmd) output = utils.system(cmd) @@ -385,8 +466,8 @@ if os.path.exists(destination): logger.info("Removing %s", destination) shutil.rmtree(destination) - logger.info("Copying %s to %s", last_source, destination) - shutil.copytree(last_source, destination) + logger.info("Copying %s to %s", backup_source, destination) + shutil.copytree(backup_source, destination) def cleanup(backup_location, full=False, keep=0, keep_blob_days=0): @@ -547,46 +628,7 @@ # tampered with. keep = 1 logger.debug("Trying to clean up old backups.") - filenames = os.listdir(backup_location) - logger.debug("Looked up filenames in the target dir: %s found. %r.", - len(filenames), filenames) - backup_dirs = [] - prefix = '' - for filename in filenames: - # We only want directories of the form prefix.X, where X is an - # integer. There should not be anything else, but we like to - # be safe. - full_path = os.path.join(backup_location, filename) - if not os.path.isdir(full_path): - continue - if filename in (os.curdir, os.pardir): - # These should not be listed by os.listdir, but again: we - # like to be safe. - continue - parts = filename.split('.') - if len(parts) != 2: - continue - try: - num = int(parts[1]) - except: - # No number - continue - if prefix: - if parts[0] != prefix: - logger.error( - "Different backup prefixes found in %s (%s, %s). Are you " - "mixing two backups in one directory? For safety we will " - "not cleanup old backups here." % ( - backup_location, prefix, parts[0])) - sys.exit(1) - else: - prefix = parts[0] - mod_time = os.path.getmtime(full_path) - backup_dirs.append((num, mod_time, full_path)) - # We always sort by backup number: - backup_dirs = sorted(backup_dirs, key=itemgetter(0)) - logger.debug("Found %d blob backups: %r.", len(backup_dirs), - [d[1] for d in backup_dirs]) + backup_dirs = get_blob_backup_dirs(backup_location) if full: logger.debug("This is a full backup.") logger.debug("Max number of backups: %d.", keep) Modified: buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/repozorunner.py ============================================================================== --- buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/repozorunner.py (original) +++ buildout/collective.recipe.backup/trunk/src/collective/recipe/backup/repozorunner.py Wed Oct 5 14:54:59 2011 @@ -101,7 +101,7 @@ verbose, as_list=True))) logger.info("Please wait while restoring database file: %s to %s", backup_location, datafs) - os.system(quote_command([repozo] + + result = os.system(quote_command([repozo] + restore_arguments(datafs, backup_location, date, verbose, as_list=True))) logger.debug("Repozo command executed.") |