Thread: [Assorted-commits] SF.net SVN: assorted: [417] numa-bench/trunk
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-15 01:43:47
|
Revision: 417 http://assorted.svn.sourceforge.net/assorted/?rev=417&view=rev Author: yangzhang Date: 2008-02-14 17:43:52 -0800 (Thu, 14 Feb 2008) Log Message: ----------- never added tools! Added Paths: ----------- numa-bench/trunk/tools/ numa-bench/trunk/tools/Makefile numa-bench/trunk/tools/PlotHist.scala numa-bench/trunk/tools/plot-hist.bash Added: numa-bench/trunk/tools/Makefile =================================================================== --- numa-bench/trunk/tools/Makefile (rev 0) +++ numa-bench/trunk/tools/Makefile 2008-02-15 01:43:52 UTC (rev 417) @@ -0,0 +1,12 @@ +COMMONS := $(wildcard commons/*.scala) + +all: out/PlotHist.class + +out/PlotHist.class: PlotHist.scala $(COMMONS) + mkdir -p out + fsc -d out $^ + +clean: + rm -rf out + +.PHONY: clean Added: numa-bench/trunk/tools/PlotHist.scala =================================================================== --- numa-bench/trunk/tools/PlotHist.scala (rev 0) +++ numa-bench/trunk/tools/PlotHist.scala 2008-02-15 01:43:52 UTC (rev 417) @@ -0,0 +1,90 @@ +import commons.Collections._ +import commons.Control._ +import commons.Io._ +import scala.util._ +object PlotHist { + def main(args: Array[String]) { + // The input consists of header lines describing the experiment + // configuration followed by body lines reporting the time measurements. + // Construct a map from configuration to bodies that were run under that + // config. + val lines = using (TextReader(Console.in)) (_.readLines.toArray) + val runs = separateHeads(groupByHeaders(lines)(_ startsWith "config: ")) + val exps = multimap( + for ((config, lines) <- runs) yield { + val pairs = + for (line <- lines) yield { + val Seq(a,b) = line split ": " + (a.toInt, b.toInt) + } + (config.split(": ")(1), pairs) + } + ) + // For each config, aggregate the bodies together by finding the average of + // all the measurements corresponding to the same core. + val graphs = for ((config, vs) <- exps) yield { + val vmap = multimap(vs flatMap (x=>x)) + val agg = for ((x,ys) <- vmap) yield (x,mean(ys.toArray)) + val arr = agg.toStream.toArray + Sorting quickSort arr + (config, arr) + } + // Also generate the scaling view of the data by grouping together by the + // first numeric parameter (in this case, the number of cores). + val scaling = multimap( + for ((config, points) <- graphs) yield { + val Seq(_, ncores, rest) = config split (" ", 3) + (rest, (ncores.toInt, Iterable.max(points map (_._2)))) + } + ) + val scalingGraphs = for ((k,vs) <- scaling; if vs.size > 1) yield { + val arr = vs.toStream.toArray + Sorting quickSort arr + (k, arr) + } + // Prepare the plotting. + val cmd = <p> + set style data histogram + # set style histogram clustered + set terminal pdf + set xlabel 'core' + set ylabel 'time (ms)' + set key off + { + // Generate the histograms. + for ((config, points) <- graphs) yield { + <p> + set title '{config}' + set output 'graphs/{spacedToHyphen(config)}.pdf' + plot '-' using 2:xticlabel(1) + {points map {case (a,b) => (a + " " + b)} mkString "\n"} + e + </p>.text + } + } + set style data linespoints + { + // Generate the time and speedup plots varying ncores. + for ((config, points) <- scalingGraphs) yield { + <p> + set title '{config}' + + set output 'graphs/{"scaling-" + spacedToHyphen(config)}.pdf' + plot '-' + {points map {case (a,b) => (a + " " + b)} mkString "\n"} + e + + set output 'graphs/{"speedup-" + spacedToHyphen(config)}.pdf' + plot '-' + { + val (_, base) = points(0) + points map {case (a,b) => (a + " " + (base.toDouble/b)) + } mkString "\n"} + e + </p>.text + } + } + </p>.text + run("gnuplot", cmd) + } +} Added: numa-bench/trunk/tools/plot-hist.bash =================================================================== --- numa-bench/trunk/tools/plot-hist.bash (rev 0) +++ numa-bench/trunk/tools/plot-hist.bash 2008-02-15 01:43:52 UTC (rev 417) @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -o errexit -o nounset +make -s +egrep '^[[:digit:]]+: [[:digit:]]+|config' "$@" | scala -cp out PlotHist Property changes on: numa-bench/trunk/tools/plot-hist.bash ___________________________________________________________________ Name: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-15 01:44:16
|
Revision: 418 http://assorted.svn.sourceforge.net/assorted/?rev=418&view=rev Author: yangzhang Date: 2008-02-14 17:44:23 -0800 (Thu, 14 Feb 2008) Log Message: ----------- added analysis and publishing makefile Added Paths: ----------- numa-bench/trunk/doc/ numa-bench/trunk/doc/Makefile numa-bench/trunk/doc/analysis.txt Added: numa-bench/trunk/doc/Makefile =================================================================== --- numa-bench/trunk/doc/Makefile (rev 0) +++ numa-bench/trunk/doc/Makefile 2008-02-15 01:44:23 UTC (rev 418) @@ -0,0 +1,24 @@ +PROJECT := numa-bench +WEBDIR := assorted/htdocs/$(PROJECT) +HTMLFRAG := ../../../assorted-site/trunk +PANDOC = pandoc -s -S --tab-stop=2 -c ../main.css -H $(HTMLFRAG)/header.html -A $(HTMLFRAG)/google-footer.html -o $@ $^ + +all: index.html analysis.html + +index.html: ../README + $(PANDOC) + +analysis.html: analysis.txt + $(PANDOC) + +publish: analysis.html index.html + ssh shell-sf mkdir -p $(WEBDIR)/graphs/ + scp $^ shell-sf:$(WEBDIR)/ + +publish-data: ../tools/graphs/*.pdf + scp $^ shell-sf:$(WEBDIR)/graphs/ + +clean: + rm -f index.html analysis.html + +.PHONY: clean publish publish-data Added: numa-bench/trunk/doc/analysis.txt =================================================================== --- numa-bench/trunk/doc/analysis.txt (rev 0) +++ numa-bench/trunk/doc/analysis.txt 2008-02-15 01:44:23 UTC (rev 418) @@ -0,0 +1,68 @@ +% NUMA Benchmarks Analysis +% Yang Zhang + +The [graphs](graphs) show the results of running several different experiments. The +results are averaged across three trials for each experiment. The experiments +varied the following parameters: + +- number of threads (CPUs, 1-16, usually 16 if not testing scalability) +- size of the memory buffer to operate on (10MB, 100MB, or 1GB) +- number of times to repeat the operation (usually one) +- whether to chew through the memory sequentially or using random access +- whether to run operations in parallel on all the CPUs +- whether to explicitly pin the threads to a CPU (usually we do) +- whether to operate on a global buffer or on our own buffer (that we allocate + ourselves) or on buffers that all other nodes allocated (for + cross-communication) +- whether to perform writes to the buffer, otherwise just read + +Here are some questions these results help answer: + +- How much does working from another node affect throughput? + - It doesn't make much difference for sequential scans - this shows hardware + prefetching (and caching) at work. It still makes [a bit of + difference](graphs/ncores-16-size-100000000-nreps-1-shuffle-0-par-0-pin-1-local-0-write-1-cross-0.pdf). + - However, for random accesses, the difference is much more + [pronounced](graphs/ncores-16-size-100000000-nreps-1-shuffle-1-par-0-pin-1-local-0-write-1-cross-0.pdf). +- How much difference is there between sequential scan and random access? + - Substantial difference. Also magnifies NUMA effects. Compare + [a](graphs/ncores-16-size-100000000-nreps-1-shuffle-0-par-0-pin-1-local-0-write-1-cross-0.pdf) + and + [b](graphs/ncores-16-size-100000000-nreps-1-shuffle-1-par-0-pin-1-local-0-write-1-cross-0.pdf) +- Read vs. write + - Substantial difference. Random writes are ~2x slower than random reads. + - Compare + [a](graphs/ncores-16-size-1000000000-nreps-1-shuffle-0-par-0-pin-1-local-0-write-0-cross-0.pdf) + and + [b](graphs/ncores-16-size-1000000000-nreps-1-shuffle-0-par-0-pin-1-local-0-write-1-cross-0.pdf) +- Does `malloc` tend to allocate locally? + - Yes, because working with memory allocated from the current thread shows + improved times. +- Scalability of: cross-node memory writes vs. shared memory writes vs. local node memory writes + - Graphs for each of these: + [a](graphs/scaling-size-10000000-nreps-1-shuffle-0-par-1-pin-1-local-0-write-1-cross-1.pdf) + vs. + [b](graphs/scaling-size-10000000-nreps-1-shuffle-0-par-1-pin-1-local-0-write-1-cross-0.pdf) + vs. + [c](graphs/scaling-size-10000000-nreps-1-shuffle-0-par-1-pin-1-local-1-write-1-cross-0.pdf) + - Local memory node access is best but still has problems scaling. The time + remains constant after some point. This is probably because increasing the + number of cores causes the load distribution to approach a more uniform + distribution. +- Scalability of: cross-node memory reads vs. shared memory reads vs. local node memory reads + - Graphs for each of these: + [a](graphs/scaling-size-10000000-nreps-1-shuffle-0-par-1-pin-1-local-0-write-0-cross-1.pdf) + vs. + [b](graphs/scaling-size-10000000-nreps-1-shuffle-0-par-1-pin-1-local-0-write-0-cross-0.pdf) + vs. + [c](graphs/scaling-size-10000000-nreps-1-shuffle-0-par-1-pin-1-local-1-write-0-cross-0.pdf) + - Cross-communicating performs worse, and local memory node access performs + the same as shared memory access. This is expected, since we aren't + performing writes, so the data is freely replicated to all caches (same + reason that there is little difference between the non-parallel reads from + local vs. remote). + +There's still quite a bit of room to fill out this test suite. For instance, +the experiments varying the number of cores all exercise the fewest number of +chips; the results may be quite different for tests that distribute the loaded +cores across all chips. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-29 16:34:13
|
Revision: 545 http://assorted.svn.sourceforge.net/assorted/?rev=545&view=rev Author: yangzhang Date: 2008-02-29 08:34:11 -0800 (Fri, 29 Feb 2008) Log Message: ----------- added publisher Added Paths: ----------- numa-bench/trunk/publish.bash Removed Paths: ------------- numa-bench/trunk/doc/Makefile Deleted: numa-bench/trunk/doc/Makefile =================================================================== --- numa-bench/trunk/doc/Makefile 2008-02-29 16:30:30 UTC (rev 544) +++ numa-bench/trunk/doc/Makefile 2008-02-29 16:34:11 UTC (rev 545) @@ -1,24 +0,0 @@ -PROJECT := numa-bench -WEBDIR := assorted/htdocs/$(PROJECT) -HTMLFRAG := ../../../assorted-site/trunk -PANDOC = pandoc -s -S --tab-stop=2 -c ../main.css -H $(HTMLFRAG)/header.html -A $(HTMLFRAG)/google-footer.html -o $@ $^ - -all: index.html analysis.html - -index.html: ../README - $(PANDOC) - -analysis.html: analysis.txt - $(PANDOC) - -publish: analysis.html index.html - ssh shell-sf mkdir -p $(WEBDIR)/graphs/ - scp $^ shell-sf:$(WEBDIR)/ - -publish-data: ../tools/graphs/*.pdf - scp $^ shell-sf:$(WEBDIR)/graphs/ - -clean: - rm -f index.html analysis.html - -.PHONY: clean publish publish-data Added: numa-bench/trunk/publish.bash =================================================================== --- numa-bench/trunk/publish.bash (rev 0) +++ numa-bench/trunk/publish.bash 2008-02-29 16:34:11 UTC (rev 545) @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +project=numa-bench +clean=false +websrcs=( README doc/analysis.txt ) +webfiles=( graphs:tools/graphs/*.pdf ) +. assorted.bash "$@" Property changes on: numa-bench/trunk/publish.bash ___________________________________________________________________ Name: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |