[Assorted-commits] SF.net SVN: assorted:[1162] ydb/trunk
Brought to you by:
yangzhang
From: <yan...@us...> - 2009-02-03 22:29:59
|
Revision: 1162 http://assorted.svn.sourceforge.net/assorted/?rev=1162&view=rev Author: yangzhang Date: 2009-02-03 22:29:55 +0000 (Tue, 03 Feb 2009) Log Message: ----------- - Print out the raw data tables. - Added default value lookups to deal with unreliably funneled output. - Fixed the parsing loop to understand the significance of === markers. - Graphs are named after the real filenames of the logs they're generated from. - Added WAL benchmark. - Updated the scaling analysis to include the WAL results. - Added a mtcp benchmark. - Renamed run to rec. - Added --yield-build-up to alleviate the large distortion in recv times (though this greatly inflates the build-up times). - Updated README/TODOs. Modified Paths: -------------- ydb/trunk/README ydb/trunk/tools/analysis.py ydb/trunk/tools/test.bash Modified: ydb/trunk/README =================================================================== --- ydb/trunk/README 2009-02-03 22:24:59 UTC (rev 1161) +++ ydb/trunk/README 2009-02-03 22:29:55 UTC (rev 1162) @@ -251,12 +251,20 @@ Period: 1/27-2/3 +- DONE associative containers benchmark +- DONE parallel tcp benchmark - DONE simple wal +- issues: + - multi vs single + - WAL performs well + - what to do? limit parallelism? how? Period: 2/3- -- DONE better wal +- TODO better wal +- TODO better understand multihost recovery - TODO fix up analysis of multihost recovery +- TODO data structures benchmark - TODO implement checkpointing disk-based scheme - TODO implement log-based recovery; show that it sucks - TODO implement group (batch) commit for log-based recovery Modified: ydb/trunk/tools/analysis.py =================================================================== --- ydb/trunk/tools/analysis.py 2009-02-03 22:24:59 UTC (rev 1161) +++ ydb/trunk/tools/analysis.py 2009-02-03 22:29:55 UTC (rev 1162) @@ -2,6 +2,7 @@ from __future__ import with_statement import re, sys, itertools, colorsys +from path import path from os.path import basename, realpath from pylab import * @@ -28,9 +29,12 @@ def show_table1(dicts): keys = dicts[0].keys() - return show_table([(k, [d[k] for d in dicts]) for k in keys]) + # TODO: Remove the default arg once we have reliably funneled output. + return show_table([(k, [d.get(k, dicts[0][k]) for d in dicts]) + for k in keys]) -def logextract(path, indexkey, pats): +def logextract(path, indexkey, pats, xform = None): + if xform is None: xform = lambda x: x check(path) # Capture values from log using regex pats. def getcaps(): @@ -38,6 +42,7 @@ caps = {} # captures: name -> int/float sats = [ False for pat in pats ] for line in f: + if line.startswith('=== '): print line,; caps = {}; sats = [False for pat in pats] # if line == '\n': print '===', caps.keys(), ''.join('1' if s else '0' for s in sats) for i, pat in enumerate(pats): m = re.search(pat, line) @@ -51,17 +56,18 @@ if all(sats): sats = [ False for pat in pats ] # print '!!!' - yield caps.copy() # [ caps[k] for k in keys ] - caps.clear() + yield xform(caps) + caps = {} # Aggregate the captured values. caps = list(getcaps()) -# print show_table1(caps) + print show_table1(caps) + caps = sorted(caps, key = lambda d: d[indexkey]) keys = [indexkey] + filter(lambda x: x != indexkey, caps[0].keys()) def gen(): for index, ds in itertools.groupby(caps, lambda d: d[indexkey]): ds = list(ds) - print [d['len'] for d in ds] - yield [ [d[k] for k in keys] for d in ds ] + # TODO: Remove the default arg once we have reliably funneled output. + yield [ [d.get(k, ds[0][k]) for k in keys] for d in ds ] a = array(list(gen())) # raw results indexes = a[:,0,0] means = median(a,1) # or a.mean(1) @@ -78,33 +84,46 @@ print return res -def scaling(path): +def scaling(scalingpath, ariespath): print '=== scaling ===' - print 'file:', getname(path) - res = logextract(path, 'n', [ - r'=== n=(?P<n>\d+) ', + print 'file:', getname(scalingpath) + res = logextract(scalingpath, 'n', [ + r'=== n=(?P<n>-?\d+) ', r'issued .*\((?P<tps>[.\d]+) tps\)' ]) - errorbar(res['n'], res['tps mean'], res['tps sd']) + print 'file:', getname(ariespath) + res2 = logextract(ariespath, 'n', [ + r'=== n=(?P<n>-?\d+) ', + r'issued .*\((?P<tps>[.\d]+) tps\)' ]) + + errorbar(hstack([res2['n'], res['n']]), + hstack([res2['tps mean'], res['tps mean']]), + hstack([res2['tps sd'], res['tps sd']])) title('Scaling of baseline throughput with number of nodes') xlabel('Node count') ylabel('Mean TPS (stdev error bars)') - xlim(res['n'].min() - .5, res['n'].max() + .5) + xlim(hstack([res2['n'], res['n']]).min() - .5, + hstack([res2['n'], res['n']]).max() + .5) ylim(ymin = 0) savefig('scaling.png') def run(singlepath, multipath): - for path, titlestr, name in [(singlepath, 'single recoverer', 'single'), - (multipath, 'multi recoverer', 'multi')]: + singlepath, multipath = map(path, [singlepath, multipath]) + for datpath, titlestr, name in [(singlepath, 'single recoverer', 'single'), + (multipath, 'multi recoverer', 'multi')]: + def xform(d): + d['realdump'] = d['dump'] - d['recv'] - d['deser'] + return d print '===', titlestr, '===' - print 'file:', getname(path) - res = logextract(path, 'seqno', + print 'file:', getname(datpath) + res = logextract(datpath, 'seqno', [ r'=== seqno=(?P<seqno>\d+) ', r'got recovery message of (?P<len>\d+) bytes in (?P<dump>\d+) ms: xfer took (?P<recv>\d+) ms, deserialization took (?P<deser>\d+)', r'built up .* (?P<buildup>\d+) ms', r'generating recovery took (?P<gen>\d+) ms', r'replayer caught up; from backlog replayed \d+ txns .* in (?P<catchup>\d+) ms', - r'.*: recovering node caught up; took (?P<total>\d+) ?ms' ] ) + r'.*: recovering node caught up; took (?P<total>\d+) ?ms' ], + xform ) # Colors and positioning width = 5e4 @@ -123,7 +142,7 @@ ehues.next(), label = label, bottom = self.bottom) self.bottom += res[yskey] - mybar('dump mean', 'dump sd', 'State dump') + mybar('realdump mean', 'realdump sd', 'State dump etc.') mybar('recv mean', 'recv sd', 'State receive') mybar('deser mean', 'deser sd', 'State deserialization') mybar('buildup mean', 'buildup sd', 'Build-up') @@ -141,17 +160,33 @@ ax2.set_ylabel('Size of serialized state (KB)', color = col) ax2.set_ylim(ymin = 0) for tl in ax2.get_yticklabels(): tl.set_color(col) - xlim(xmin = min(res['seqno']) - width, xmax = max(res['seqno']) + width) - savefig(name + '.png') + pngpath = datpath.realpath() + '.png' + savefig(pngpath) + symlink = path(name + '.png') + if symlink.isfile(): symlink.remove() + pngpath.symlink(symlink) + +def mtcp(datpath): + res = logextract(datpath, 'n', + [ r'=== n=(?P<n>\d+)', + r'real\s+0m(?P<t>[0-9\.]+)s' ]) + errorbar(res['n'], res['t mean'], res['t sd']) + title('Time to send a large message (6888896 bytes)') + xlabel('Number of parallel senders') + ylabel('Time (ms)') + savefig('mtcp.png') + def main(argv): if len(argv) <= 1: print >> sys.stderr, 'Must specify a command' elif argv[1] == 'scaling': - scaling(argv[2] if len(argv) > 2 else 'scaling-log') + scaling(*argv[2:] if len(argv) > 2 else ['scaling-log', 'aries-log']) elif argv[1] == 'run': run(*argv[2:] if len(argv) > 2 else ['single-log', 'multi-log']) + elif argv[1] == 'mtcp': + mtcp('mtcp-log') else: print >> sys.stderr, 'Unknown command:', argv[1] Modified: ydb/trunk/tools/test.bash =================================================================== --- ydb/trunk/tools/test.bash 2009-02-03 22:24:59 UTC (rev 1161) +++ ydb/trunk/tools/test.bash 2009-02-03 22:29:55 UTC (rev 1162) @@ -232,10 +232,10 @@ scaling-helper() { local leader=$1 shift - tagssh $leader "ydb/src/ydb -l -n $# -X 100000" & + tagssh $leader "ydb/src/ydb -l -n $# -X 100000 ${extraargs:-}" & sleep .1 for rep in "$@" - do tagssh $rep "ydb/src/ydb -n $# -H $leader" & + do tagssh $rep "ydb/src/ydb -n $# -H $leader ${extraargs:-}" & done wait } @@ -249,13 +249,13 @@ # configurations; e.g., "repeat scaling". # TODO: fix this to work also with `hosts`; move into repeat-helper that's run # via hostargs, and change the range= to hosts= -full-scaling() { +exp-scaling() { local out=scaling-log-$(date +%Y-%m-%d-%H:%M:%S-%N) local orighosts="$hosts" maxn=$(( $(echo $hosts | wc -w) - 1 )) ln -sf $out scaling-log - for n in `seq $maxn -1 1` ; do # configurations + for n in `seq $maxn -1 0` ; do # configurations stop - for i in {1..5} ; do # trials + for i in {1..3} ; do # trials echo === n=$n i=$i === echo === n=$n i=$i === > `tty` scaling @@ -269,7 +269,7 @@ hosts="$orighosts" } -run-helper() { +rec-helper() { local leader=$1 shift : ${seqno:=100000} @@ -282,7 +282,7 @@ done sleep .1 # pexpect 'got all \d+ replicas' leader # Run joiner. - tagssh $1 "ydb/src/ydb -H $leader --yield-catch-up ${extraargs:-}" & # -v --debug-threads -t 200000" & + tagssh $1 "ydb/src/ydb -H $leader --yield-build-up --yield-catch-up ${extraargs:-}" & # -v --debug-threads -t 200000" & if false ; then if [[ ${wait2:-} ]] then sleep $wait2 @@ -293,18 +293,18 @@ wait } -run() { - hostargs run-helper +rec() { + hostargs rec-helper } # Recovery experient. -exp() { +exp-rec() { for seqno in 500000 400000 300000 200000 100000 ; do # configurations stop - for i in {1..5} ; do # trials + for i in {1..3} ; do # trials echo === seqno=$seqno i=$i === echo === seqno=$seqno i=$i === > `tty` - run + rec sleep 1 stop sleep .1 @@ -314,19 +314,68 @@ } # Single-host recovery experiment. -exp-single() { +exp-rec-single() { local out=single-log-$(date +%Y-%m-%d-%H:%M:%S) ln -sf $out single-log - exp >& $out + exp-rec >& $out } # Multi-host recovery experiment. -exp-multi() { +exp-rec-multi() { local out=multi-log-$(date +%Y-%m-%d-%H:%M:%S) ln -sf $out multi-log - extraargs="-m ${extraargs:-}" exp >& $out + extraargs="-m ${extraargs:-}" exp-rec >& $out } +# WAL. +aries() { + extraargs='--wal' scaling ${hosts:-} +} + +exp-aries() { + local out=aries-log-$(date +%Y-%m-%d-%H:%M:%S) + ln -sf $out aries-log + for i in {1..3} ; do + echo === n=-1 i=$i === + echo === n=-1 i=$i === > `tty` + aries + echo + done >& $out +} + +mtcp-helper() { + local leader=$1 n=$(( $# - 1 )) + tagssh $leader 'pkill nc' + shift + while (( $# > 0 )) ; do + tagssh $1 "sleep .5 ; time seq $((1000000/n)) | nc $leader 9876" & + shift + done + tagssh $leader "nc -l 9876 > /dev/null" + wait +} + +mtcp() { + hostargs mtcp-helper +} + +exp-mtcp() { + local out=mtcp-log-$(date +%Y-%m-%d-%H:%M:%S-%N) + local orighosts="$hosts" maxn=$(( $(echo $hosts | wc -w) - 1 )) + ln -sf $out mtcp-log + for n in `seq $maxn -1 1` ; do # configurations + for i in {1..3} ; do # trials + echo === n=$n i=$i === + echo === n=$n i=$i === > `tty` + mtcp + sleep 1 + echo + done + hosts="${hosts% *}" + done >& $out + hosts="$orighosts" +} + stop-helper() { tagssh $1 'pkill -sigint ydb' } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |