|
From: <di...@us...> - 2008-03-18 15:47:06
|
Revision: 592
http://safekeep.svn.sourceforge.net/safekeep/?rev=592&view=rev
Author: dimi
Date: 2008-03-18 08:47:01 -0700 (Tue, 18 Mar 2008)
Log Message:
-----------
Frank Crawford <fr...@cr...>
* Added a cleanup option to client and server modes to remove
safekeep LVM snapshots and mounts after a crash or problem.
* Added new communications tag "SCRUB" to do a full remote cleanup.
* Added a warning if there is a mismatch in the communications
protocol minor level.
* Append specific paths (/sbin, /usr/sbin and /usr/local/sbin) to
the client path when run in cleanup mode, to cover any path issues.
* Fixed a couple of issues with pass client exceptions back to
the server, and strip off excess newlines.
* Add test and abort run on client if there are any existing safekeep
LVM snapshots.
Modified Paths:
--------------
safekeep/trunk/doc/safekeep.txt
safekeep/trunk/safekeep
Modified: safekeep/trunk/doc/safekeep.txt
===================================================================
--- safekeep/trunk/doc/safekeep.txt 2008-03-01 22:34:36 UTC (rev 591)
+++ safekeep/trunk/doc/safekeep.txt 2008-03-18 15:47:01 UTC (rev 592)
@@ -7,13 +7,13 @@
SYNOPSIS
--------
-'safekeep' --server [-q] [-v] [--noemail] [--force] [-c file] <clientid>*
+'safekeep' --server [-q] [-v] [--noemail] [--force] [-c file] [--cleanup] <clientid>*
'safekeep' --keys [-q] [-v] [--noemail] [-c file] [-i file] [--status] [--print] [--deploy] <clientid>*
'safekeep' --list [-q] [-v] [--noemail] [-c file] [--increments] [--parsable-output] [--sizes] [--changed=<time>] [--at-time=<time>] <clientid>*
-'safekeep' --client
+'safekeep' --client [--cleanup]
'safekeep' -h | -V
@@ -40,6 +40,11 @@
Note that the client mode of SafeKeep should never be invoked manually,
this mode is meant to be used only by the server mode of SafeKeep.
+The only exception to this is if run with the `--cleanup` option, which
+is used to remove LVM snapshots and mounts created by Safekeep, after a
+crash or some other failure, without a connection to the server.
+Normally this cleanup would be performed through the server command
+`safekeep --server --cleanup`.
The SSH key management mode is a helper mode for deploying or verifying
the setup of the SSH authentification keys.
@@ -111,6 +116,25 @@
backup directory becomes corrupt, and `rdiff-backup` error
logs tells you to use this option.
+--cleanup::
+ Remove LVM snapshots and mounts left by Safekeep after a
+ crash or other failure. This will run also run the standard
+ cleanup processes, such as the removal of an DB dumps, and
+ forces a consistency check of the `rdiff-backup` destination
+ directory. This is the prefered cleanup procedure and can
+ be run with no danger of corrupting the system if there is
+ nothing to cleanup.
+
+CLIENT OPTIONS
+--------------
+--cleanup::
+ Remove LVM snapshots and mounts left after a crash or other
+ failure from the local system. Unlike the equivalent `--server`
+ option, it does not do any other of the standard cleanups.
+ This option should only be used when it is not possible to
+ refer to the server, for example, when the network connection
+ to the server is no longer available.
+
KEYS OPTIONS
------------
-i FILE::
Modified: safekeep/trunk/safekeep
===================================================================
--- safekeep/trunk/safekeep 2008-03-01 22:34:36 UTC (rev 591)
+++ safekeep/trunk/safekeep 2008-03-18 15:47:01 UTC (rev 592)
@@ -16,7 +16,7 @@
# along with Safekeep. If not, see <http://www.gnu.org/licenses/>.
from __future__ import generators
-import getopt, os, os.path, popen2, re, sys
+import getopt, os, os.path, popen2, re, sys, fnmatch
import commands, tempfile, time, traceback
import getpass, pwd, xml.dom.minidom
import socket, smtplib
@@ -53,7 +53,7 @@
home_dir = None
base_dir = None
-PROTOCOL = "1.0"
+PROTOCOL = "1.1"
VERSION = "1.0.4"
VEBOSITY_BY_CLASS = {'DBG': 3, 'INFO': 2, 'WARN': 1, 'ERR': 0}
@@ -394,19 +394,48 @@
warn('Unable to remove dump file: %s for database %s because: %s' %
(dump['file'], dump['db'], e))
-def gather_lvm_information(device):
- device = device.replace('/mapper','').replace('-','/')
- (group, volume) = device.split('/')[-2:]
+def lvm_snap_information():
+ (cin, cout) = os.popen4(['lvs', '--separator', ':', '--noheadings'])
+ lines = cout.readlines()
+ cout.close()
+ cin.close()
+ lvms = []
+ for line in lines:
+ if line.count(':') > 3:
+ (volume, group, attr, blah1) = line.lstrip().split(':', 3)
+ if fnmatch.fnmatch(volume, '*_snap_safekeep-*') and attr[0].lower() == 's':
+ lvms.append([volume, group])
+ return lvms
+
+def mount_information(reverse = False):
(cin, cout) = os.popen4('mount')
lines = cout.readlines()
cout.close()
cin.close()
+ mounts = []
+ if reverse:
+ lines.reverse()
for line in lines:
- (device, blah1, mountpoint, blah2, mounttype, blah3) = line.split(' ', 5)
- if line.startswith('/dev/mapper/' + group + '-' + volume + ' '):
+ (device, blah1, mountpoint, blah2, mounttype, mountoptions) = line.split()
+ mounts.append([device, mountpoint, mounttype, mountoptions[1:-1]])
+ return mounts
+
+def map_lvm_device(device):
+ device = device.replace('/mapper','').replace('-','/')
+ return device.split('/')[-2:]
+
+def check_lvm_information(device):
+ (group, volume) = map_lvm_device(device)
+ for (lvm_volume, lvm_group) in lvm_snap_information():
+ if lvm_group == group and lvm_volume.startswith(volume):
+ return True
+ return False
+
+def gather_lvm_information(device):
+ (group, volume) = map_lvm_device(device)
+ for (device, mountpoint, mounttype, mountoptions) in mount_information(False):
+ if [group, volume] == map_lvm_device(device):
return (group, volume, mountpoint, mounttype)
- elif line.startswith('/dev/' + group + '/' + volume + ' '):
- return (group, volume, mountpoint, mounttype)
return (None, None, None, None)
def gather_snap_information(device, bdir):
@@ -486,6 +515,12 @@
do_client_dbdump(cfg)
if len(cfg['snaps']) > 0:
+ debug('Checking FS snapshots')
+ for snap in cfg['snaps']:
+ device = snap['device']
+ if check_lvm_information(device):
+ raise Exception("Previous snapshots found for %s: run 'safekeep --server --cleanup' to correct" % device)
+
ret = spawn(['modprobe', 'dm-snapshot'])
if ret:
warn('modprobe dm-snapshot failed, continuing')
@@ -525,6 +560,76 @@
def do_client_compat(server_versions):
debug('Server versions: %s' % server_versions)
+def do_client_scrub():
+ debug("Do client scrub loop")
+
+ if os.getuid():
+ if is_client:
+ raise Exception('client not running as root')
+ else:
+ error("--cleanup must be run as root")
+ sys.exit(2)
+
+ scrubbed = False
+ if os.environ['PATH'][-1] == ':':
+ os.environ['PATH'] += '/sbin:/usr/sbin:/usr/local/sbin:'
+ else:
+ os.environ['PATH'] += ':/sbin:/usr/sbin:/usr/local/sbin'
+
+ # Go through and unmount anythings that are still hanging around
+
+ debug("Cleaning up existing mounts")
+ for (device, mountpoint, mounttype, mountoptions) in mount_information(True):
+ if mountpoint.startswith('/mnt/safekeep-'):
+ info("Removing mount %s" % mountpoint)
+ if device == '/' and 'bind' in mountoptions.split(','):
+ info("Removing rbind directory %s" % mountpoint)
+ ret = spawn(['umount', '-l', mountpoint])
+ if ret:
+ warn('Failed to unmount: ' + mountpoint)
+ else:
+ try:
+ os.rmdir(mountpoint)
+ except Exception, e:
+ warn('Failed to remove: ' + mountpoint)
+ else:
+ ret = spawn(['umount', mountpoint])
+ if ret:
+ warn('Can not unmount the snapshot: %s' % mountpoint)
+ if fnmatch.fnmatch(device, '*_snap_safekeep-*'):
+ info("Removing snapshot %s" % device)
+ ret = spawn(['lvremove', '--force', device])
+ if ret:
+ warn('Can not tear down snapshot: ' + device)
+ scrubbed = True
+
+ # Now cleanup any snapshots still hanging around
+
+ debug("Cleaning up remaining snapshots")
+ for (volume, group) in lvm_snap_information():
+ device = os.path.join('/dev', group, volume)
+ info("Removing snapshot %s" % device)
+ ret = spawn(['lvremove', '--force', device])
+ if ret:
+ warn('Can not tear down snapshot: ' + device)
+ scrubbed = True
+
+ # Now cleanup any safekeep directories still hanging around
+
+ debug("Cleaning up remaining safekeep directories")
+ if os.path.isdir('/mnt'):
+ for ent in os.listdir('/mnt'):
+ mountpoint = os.path.join('/mnt', ent)
+ if ent.startswith('safekeep-') and os.path.isdir(mountpoint):
+ info("Removing rbind directory %s" % mountpoint)
+ try:
+ os.rmdir(mountpoint)
+ except Exception, e:
+ warn('Failed to remove: ' + mountpoint)
+
+ if not scrubbed:
+ info('No cleanup required')
+
def do_client():
debug("Do client main loop")
should_cleanup = True
@@ -547,6 +652,9 @@
if dir == bdir: should_cleanup = False
do_client_cleanup(cfg, dir)
send('OK')
+ elif line.startswith('SCRUB'):
+ do_client_scrub()
+ send('OK')
elif not line:
break
else:
@@ -554,7 +662,7 @@
break
except Exception, e:
traceback.print_exc(file=sys.stdout)
- send('ERROR ' + e)
+ send('ERROR %s' % e)
finally:
if should_cleanup:
do_client_cleanup(cfg, bdir)
@@ -570,7 +678,7 @@
if line.startswith('OK'):
return line[2:-1].strip()
elif line.startswith('ERROR'):
- raise Exception(line[5:])
+ raise Exception(line[5:].strip())
elif not line:
raise Exception('client died unexpectedly')
else:
@@ -604,6 +712,12 @@
if ret:
raise Exception('Failed to run rdiff-backup')
+def do_server_rdiff_cleanup(cfg):
+ args = ['rdiff-backup', '--check-destination-dir', cfg['dir']]
+ ret = spawn(args)
+ if ret:
+ warn('Failed to cleanup old data, please fix the problem manually')
+
def do_server_data_cleanup(cfg):
args = ['rdiff-backup', '--force', '--remove-older-than', cfg['retention'], cfg['dir']]
ret = spawn(args)
@@ -616,8 +730,10 @@
(server_major, server_minor) = PROTOCOL.split('.')
if server_major != client_major:
raise Exception('Incompatible protocols: %s <> %s' % (PROTOCOL, client_protocol))
+ elif server_minor > client_minor:
+ warn('Protocol mismatch: %s <> %s' % (PROTOCOL, client_protocol))
-def do_server(cfgs, ids, force):
+def do_server(cfgs, ids, force, cleanup):
debug("Do server main loop")
for cfg in cfgs.itervalues():
id = cfg['id']
@@ -640,7 +756,7 @@
raise Exception('Can not create data store dir: %s' % datadir)
rdiff_logdir = os.path.join(datadir, 'rdiff-backup-data')
- if cfg['retention'] and os.path.isdir(rdiff_logdir):
+ if cfg['retention'] and os.path.isdir(rdiff_logdir) and not cleanup:
do_server_data_cleanup(cfg)
if cfg['host']:
@@ -660,36 +776,44 @@
cin.flush()
do_server_getanswer(cout)
- cin.write('SETUP\n')
- cin.flush()
- bdir = do_server_getanswer(cout)
-
- if os.path.isdir(rdiff_logdir):
- rdiff_logpre = os.listdir(rdiff_logdir)
+ if cleanup:
+ cin.write('SCRUB\n')
+ cin.flush()
+ do_server_getanswer(cout)
+ bdir = '/' # Fake directory for the rest of the cleanup
+ do_server_rdiff_cleanup(cfg)
+ errs = 0
else:
- rdiff_logpre = []
+ cin.write('SETUP\n')
+ cin.flush()
+ bdir = do_server_getanswer(cout)
- backup_log = os.path.join(rdiff_logdir, 'backup.log')
- if os.path.isfile(backup_log):
- backup_marker = '=== Backup session on %s ===' % time.asctime()
- fbm = open(backup_log, 'a')
- fbm.write(backup_marker + '\n')
- fbm.close()
- else:
- backup_marker = None
+ if os.path.isdir(rdiff_logdir):
+ rdiff_logpre = os.listdir(rdiff_logdir)
+ else:
+ rdiff_logpre = []
- do_server_rdiff(cfg, bdir, force)
+ backup_log = os.path.join(rdiff_logdir, 'backup.log')
+ if os.path.isfile(backup_log):
+ backup_marker = '=== Backup session on %s ===' % time.asctime()
+ fbm = open(backup_log, 'a')
+ fbm.write(backup_marker + '\n')
+ fbm.close()
+ else:
+ backup_marker = None
- errs = 0
- if os.path.isdir(rdiff_logdir):
- info_file(backup_log, backup_marker)
- rdiff_logpost = os.listdir(rdiff_logdir)
- for lfn in rdiff_logpost:
- if lfn.startswith('session_statistics.') and lfn.endswith('.data') and lfn not in rdiff_logpre:
- errs += info_file(os.path.join(rdiff_logdir, lfn))
- else:
- warn('Log dir does not exist.')
+ do_server_rdiff(cfg, bdir, force)
+ errs = 0
+ if os.path.isdir(rdiff_logdir):
+ info_file(backup_log, backup_marker)
+ rdiff_logpost = os.listdir(rdiff_logdir)
+ for lfn in rdiff_logpost:
+ if lfn.startswith('session_statistics.') and lfn.endswith('.data') and lfn not in rdiff_logpre:
+ errs += info_file(os.path.join(rdiff_logdir, lfn))
+ else:
+ warn('Log dir does not exist.')
+
cin.write('CLEANUP %s\n' % bdir)
cin.flush()
do_server_getanswer(cout)
@@ -902,6 +1026,7 @@
print
print 'server options:'
print '--force force backup destination overwriting, dangerous!'
+ print '--cleanup perform cleanup actions after a failure'
print
print 'keys options:'
print '-i FILE use FILE as identity for RSA/DSA authentication'
@@ -924,7 +1049,7 @@
'email=', 'force', 'help', 'keys',
'list', 'increments', 'sizes',
'parsable-output', 'changed=', 'at-time=',
- 'noemail',
+ 'noemail', 'cleanup',
'print', 'quiet', 'server', 'smtp=',
'status', 'verbose', 'version'])
except getopt.GetoptError:
@@ -939,6 +1064,7 @@
verbosity = 0
clientid = None
force = 0
+ cleanup = 0
noemail = 0
list_type = None
list_parsable = 0
@@ -981,6 +1107,8 @@
mode = 'keys'
elif o in ('--force', ):
force = 1
+ elif o in ('--cleanup', ):
+ cleanup = 1
elif o in ('--noemail', ):
noemail = 1
elif o in ('--increments', ):
@@ -1027,6 +1155,9 @@
if mode is not 'server' and (email or smtp):
usage(2)
+ if not mode in ['server', 'client'] and cleanup:
+ usage(2)
+
if mode is 'client' and cfglocs:
usage(2)
@@ -1088,7 +1219,7 @@
if mode is 'server':
is_client = False
verbosity_level = 1 + verbosity
- do_server(cfgs, args, force)
+ do_server(cfgs, args, force, cleanup)
elif mode is 'list':
if list_type is None:
list_type = 'increments'
@@ -1096,9 +1227,14 @@
verbosity_level = 2 + verbosity
do_list(cfgs, args, list_type, list_date, list_parsable)
elif mode is 'client':
- is_client = True
- verbosity_level = 3 + verbosity
- do_client()
+ if cleanup:
+ is_client = False
+ verbosity_level = 1 + verbosity
+ do_client_scrub()
+ else:
+ is_client = True
+ verbosity_level = 3 + verbosity
+ do_client()
elif mode is 'keys':
is_client = False
verbosity_level = 1 + verbosity
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|