|
From: Michael D. <md...@st...> - 2013-10-10 14:23:18
|
Thanks. This is much more helpful. What we need, however, is a "self contained, standalone example". The code below calls functions that are not present. See http://sscce.org/ for why this is so important. Again, I would have to guess what those functions do -- it may be relevant, it may not. If I have something that I can *just run* then I can use various introspection tools to see what is going wrong. Mike On 10/10/2013 10:12 AM, Martin MOKREJŠ wrote: > Michael Droettboom wrote: >> Can you provide a complete, standalone example that reproduces the >> problem. Otherwise all I can do is guess. >> >> The usual culprit is forgetting to close figures after you're done with >> them. > Thanks, I learned that through matplotlib-1.3.0 give spit over me a warning message some weeks > ago. Yes, i do call _figure.clear() and pylab.clf() but only after the savefig() returns, which > is not the case here. Also use gc.collect() a lot through the code, especially before and after > I draw every figure. That is not enough here. > > > > > > from itertools import izip, imap, ifilter > import pylab > import matplotlib > # Force matplotlib not to use any X-windows backend. > matplotlib.use('Agg') > import pylab > > F = pylab.gcf() > > # convert the view of numpy array to tuple > # http://matplotlib.1069221.n5.nabble.com/RendererAgg-int-width-int-height-dpi-debug-False-ValueError-width-and-height-must-each-be-below-32768-td27756.html > DefaultSize = tuple(F.get_size_inches()) > > > > def draw_hist2d_plot(filename, mydata_x, mydata_y, colors, title_data, xlabel_data, ylabel_data, legends, legend_loc='upper right', legend_bbox_to_anchor=(1.0, 1.0), legend_ncol=None, xmin=None, xmax=None, ymin=None, ymax=None, fontsize=10, legend_fontsize=8, dpi=100, tight_layout=False, legend_inside=False, objsize=0.1): > # hist2d(x, y, bins = None, range=None, weights=None, cmin=None, cmax=None **kwargs) > > if len(mydata_x) != len(mydata_y): > raise ValueError, "%s: len(mydata_x) != len(mydata_y): %s != %s" % (filename, len(mydata_x), len(mydata_y)) > > if colors and len(mydata_x) != len(colors): > sys.stderr.write("Warning: draw_hist2d_plot(): %s: len(mydata_x) != len(colors): %s != %s.\n" % (filename, len(mydata_x), len(colors))) > > if colors and legends and len(colors) != len(legends): > sys.stderr.write("Warning: draw_hist2d_plot(): %s, len(colors) != len(legends): %s != %s.\n" % (filename, len(colors), len(legends))) > > if mydata_x and mydata_y and filename: > if legends: > if not legend_ncol: > _subfigs, _ax1_num, _ax2_num, _legend_ncol = get_ncol(legends, fontsize=legend_fontsize) > else: > _subfigs, _ax1_num, _ax2_num, _legend_ncol = 3, 213, 313, legend_ncol > else: > _subfigs, _ax1_num, _legend_ncol = 3, 313, 0 > > set_my_pylab_defaults() > pylab.clf() > _figure = pylab.figure() > _figure.clear() > _figure.set_tight_layout(True) > gc.collect() > > if legends: > # do not crash on too tall figures > if 8.4 * _subfigs < 200: > _figure.set_size_inches(11.2, 8.4 * (_subfigs + 1)) > else: > # _figure.set_size_inches() silently accepts a large value but later on _figure.savefig() crashes with: > # ValueError: width and height must each be below 32768 > _figure.set_size_inches(11.2, 200) > sys.stderr.write("Warning: draw_hist2d_plot(): Wanted to set %s figure height to %s but is too high, forcing %s instead. You will likely get an incomplete image.\n" % (filename, 8.4 * _subfigs, 200)) > if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s figure size to: %s" % (filename, str(_figure.get_size_inches())) > _ax1 = _figure.add_subplot(_ax1_num) > _ax2 = _figure.add_subplot(_ax2_num) > else: > _figure.set_size_inches(11.2, 8.4 * 2) > _ax1 = _figure.gca() > if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s figure size to: %s" % (filename, str(_figure.get_size_inches())) > > _series = [] > #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends): > for _x, _y, _c in izip(mydata_x, mydata_y, colors): > # _Line2D = _ax1.plot(_x, _y) # returns Line2D object > _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , label=_l) # returns PathCollection object > _series.append(_my_PathCollection) > > if legends: > #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends): > for _x, _y, _c in izip(mydata_x, mydata_y, colors): > _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , label=_l) > _series.append(_my_PathCollection) > > _ax2.legend(_series, legends, loc='upper left', bbox_to_anchor=(0,0,1,1), borderaxespad=0., ncol=_legend_ncol, mode='expand', fontsize=legend_fontsize) > _ax2.set_frame_on(False) > _ax2.tick_params(bottom='off', left='off', right='off', top='off') > pylab.setp(_ax2.get_yticklabels(), visible=False) > pylab.setp(_ax2.get_xticklabels(), visible=False) > else: > for _x, _y, _c in izip(mydata_x, mydata_y, colors): > _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') # keeps eating memory in: > # > # draw_hist2d_plot(filename, _data_xrow, _data_yrow, _my_colors, _title, _xlabel, _ylabel, [], xmin=None, xmax=None, ymin=None, ymax=None, fontsize=10, dpi=100) > # File "/blah.py", line 14080, in draw_hist2d_plot > # _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') > # File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 6247, in scatter > # self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs) > # File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 1685, in _process_unit_info > # self.xaxis.update_units(xdata) > # File "/usr/lib64/python2.7/site-packages/matplotlib/axis.py", line 1332, in update_units > # converter = munits.registry.get_converter(data) > > # pylab.subplots_adjust(left = (5/25.4)/_figure.xsize, bottom = (4/25.4)/_figure.ysize, right = 1 - (1/25.4)/_figure.xsize, top = 1 - (3/25.4)/_figure.ysize) > > _ax1.set_xlabel(xlabel_data, fontsize=fontsize) > _ax1.set_ylabel(ylabel_data, fontsize=fontsize) > _ax1.set_xmargin(0.05) > _ax1.set_ymargin(0.05) > _ax1.set_autoscale_on(False) > > > set_limits(_ax1, xmin, xmax, ymin, ymax) > > if fontsize == 10: > _ax1.set_title('\n'.join(wrap(title_data, 100)), fontsize=fontsize+2) > elif fontsize == 12: > _ax1.set_title('\n'.join(wrap(title_data, 90)), fontsize=fontsize+2) > else: > _ax1.set_title('\n'.join(wrap(title_data, 100)), fontsize=fontsize+2) > > if legends: > _figure.savefig(filename, dpi=100) #, bbox_inches='tight') > del(_my_PathCollection) > del(_ax2) > else: > _figure.savefig(filename, dpi=100) > > del(_series) > del(_ax1) > _figure.clear() > del(_figure) > pylab.clf() > pylab.close() > # pylab.rcdefaults() > > gc.collect() > > > > That's the whole function. I used to suspect _ax1.scatter() in the past but probably > only because I hit the memory problems earlier. That is worked around now by using > on disk bsddb3 file or gdbm somewhere upstream. This particular function is nevertheless > fed with just a huge list numbers, and that is not the issue in itself. > > I would be glad if I could tell matplotlib: Here you have 100 colors, use them for all data > as you wish, just spread them evenly over the whole dataset so that first 1/100th of the data > gets the first color, second 1/100th of the data gets the second color, and so on. Optionally, > if you would like to say: use the 100 colors in cycles for all data points, just loop through > the colors as long as you need some. In both scenarios, I could have avoided the two for loops > in the above code and necessity to generate those objects. Same for legend stuff. > > Martin > >> Mike >> >> On 10/10/2013 09:05 AM, Martin MOKREJŠ wrote: >>> Hi, >>> rendering some of my charts takes almost 50GB of RAM. I believe below is a stracktrace >>> of one such situation when it already took 15GB. Would somebody comments on what is >>> matplotlib doing at the very moment? Why the recursion? >>> >>> The charts had to have 262422 data points in a 2D scatter plot, each point has assigned >>> its own color. They are in batches so that there are 153 distinct colors but nevertheless, >>> I assigned to each data point a color value. There are 153 legend items also (one color >>> won't be used). >>> >>> ^CTraceback (most recent call last): >>> ... >>> _figure.savefig(filename, dpi=100) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 1421, in savefig >>> self.canvas.print_figure(*args, **kwargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/backend_bases.py", line 2220, in print_figure >>> **kwargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 505, in print_png >>> FigureCanvasAgg.draw(self) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 451, in draw >>> self.figure.draw(self.renderer) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper >>> draw(artist, renderer, *args, **kwargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 1034, in draw >>> func(*args) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper >>> draw(artist, renderer, *args, **kwargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 2086, in draw >>> a.draw(renderer) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper >>> draw(artist, renderer, *args, **kwargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 718, in draw >>> return Collection.draw(self, renderer) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper >>> draw(artist, renderer, *args, **kwargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 276, in draw >>> offsets, transOffset, self.get_facecolor(), self.get_edgecolor(), >>> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 551, in get_edgecolor >>> return self._edgecolors >>> KeyboardInterrupt >>> ^CError in atexit._run_exitfuncs: >>> Traceback (most recent call last): >>> File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs >>> func(*targs, **kargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", line 90, in destroy_all >>> gc.collect() >>> KeyboardInterrupt >>> Error in sys.exitfunc: >>> Traceback (most recent call last): >>> File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs >>> func(*targs, **kargs) >>> File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", line 90, in destroy_all >>> gc.collect() >>> KeyboardInterrupt >>> >>> ^C >>> >>> >>> Clues what is the code doing? I use mpl-1.3.0. >>> Thank you, >>> Martin >>> >>> ------------------------------------------------------------------------------ >>> October Webinars: Code for Performance >>> Free Intel webinars can help you accelerate application performance. >>> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from >>> the latest Intel processors and coprocessors. See abstracts and register > >>> http://pubads.g.doubleclick.net/gampad/clk?id=60134071&iu=/4140/ostg.clktrk >>> _______________________________________________ >>> Matplotlib-users mailing list >>> Mat...@li... >>> https://lists.sourceforge.net/lists/listinfo/matplotlib-users >> -- _ |\/|o _|_ _. _ | | \.__ __|__|_|_ _ _ ._ _ | ||(_| |(_|(/_| |_/|(_)(/_|_ |_|_)(_)(_)| | | http://www.droettboom.com |