|
From: Martin M. <mmo...@gm...> - 2013-10-10 14:11:40
|
Michael Droettboom wrote:
> Can you provide a complete, standalone example that reproduces the
> problem. Otherwise all I can do is guess.
>
> The usual culprit is forgetting to close figures after you're done with
> them.
Thanks, I learned that through matplotlib-1.3.0 give spit over me a warning message some weeks
ago. Yes, i do call _figure.clear() and pylab.clf() but only after the savefig() returns, which
is not the case here. Also use gc.collect() a lot through the code, especially before and after
I draw every figure. That is not enough here.
from itertools import izip, imap, ifilter
import pylab
import matplotlib
# Force matplotlib not to use any X-windows backend.
matplotlib.use('Agg')
import pylab
F = pylab.gcf()
# convert the view of numpy array to tuple
# http://matplotlib.1069221.n5.nabble.com/RendererAgg-int-width-int-height-dpi-debug-False-ValueError-width-and-height-must-each-be-below-32768-td27756.html
DefaultSize = tuple(F.get_size_inches())
def draw_hist2d_plot(filename, mydata_x, mydata_y, colors, title_data, xlabel_data, ylabel_data, legends, legend_loc='upper right', legend_bbox_to_anchor=(1.0, 1.0), legend_ncol=None, xmin=None, xmax=None, ymin=None, ymax=None, fontsize=10, legend_fontsize=8, dpi=100, tight_layout=False, legend_inside=False, objsize=0.1):
# hist2d(x, y, bins = None, range=None, weights=None, cmin=None, cmax=None **kwargs)
if len(mydata_x) != len(mydata_y):
raise ValueError, "%s: len(mydata_x) != len(mydata_y): %s != %s" % (filename, len(mydata_x), len(mydata_y))
if colors and len(mydata_x) != len(colors):
sys.stderr.write("Warning: draw_hist2d_plot(): %s: len(mydata_x) != len(colors): %s != %s.\n" % (filename, len(mydata_x), len(colors)))
if colors and legends and len(colors) != len(legends):
sys.stderr.write("Warning: draw_hist2d_plot(): %s, len(colors) != len(legends): %s != %s.\n" % (filename, len(colors), len(legends)))
if mydata_x and mydata_y and filename:
if legends:
if not legend_ncol:
_subfigs, _ax1_num, _ax2_num, _legend_ncol = get_ncol(legends, fontsize=legend_fontsize)
else:
_subfigs, _ax1_num, _ax2_num, _legend_ncol = 3, 213, 313, legend_ncol
else:
_subfigs, _ax1_num, _legend_ncol = 3, 313, 0
set_my_pylab_defaults()
pylab.clf()
_figure = pylab.figure()
_figure.clear()
_figure.set_tight_layout(True)
gc.collect()
if legends:
# do not crash on too tall figures
if 8.4 * _subfigs < 200:
_figure.set_size_inches(11.2, 8.4 * (_subfigs + 1))
else:
# _figure.set_size_inches() silently accepts a large value but later on _figure.savefig() crashes with:
# ValueError: width and height must each be below 32768
_figure.set_size_inches(11.2, 200)
sys.stderr.write("Warning: draw_hist2d_plot(): Wanted to set %s figure height to %s but is too high, forcing %s instead. You will likely get an incomplete image.\n" % (filename, 8.4 * _subfigs, 200))
if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s figure size to: %s" % (filename, str(_figure.get_size_inches()))
_ax1 = _figure.add_subplot(_ax1_num)
_ax2 = _figure.add_subplot(_ax2_num)
else:
_figure.set_size_inches(11.2, 8.4 * 2)
_ax1 = _figure.gca()
if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s figure size to: %s" % (filename, str(_figure.get_size_inches()))
_series = []
#for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends):
for _x, _y, _c in izip(mydata_x, mydata_y, colors):
# _Line2D = _ax1.plot(_x, _y) # returns Line2D object
_my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , label=_l) # returns PathCollection object
_series.append(_my_PathCollection)
if legends:
#for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends):
for _x, _y, _c in izip(mydata_x, mydata_y, colors):
_my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , label=_l)
_series.append(_my_PathCollection)
_ax2.legend(_series, legends, loc='upper left', bbox_to_anchor=(0,0,1,1), borderaxespad=0., ncol=_legend_ncol, mode='expand', fontsize=legend_fontsize)
_ax2.set_frame_on(False)
_ax2.tick_params(bottom='off', left='off', right='off', top='off')
pylab.setp(_ax2.get_yticklabels(), visible=False)
pylab.setp(_ax2.get_xticklabels(), visible=False)
else:
for _x, _y, _c in izip(mydata_x, mydata_y, colors):
_ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') # keeps eating memory in:
#
# draw_hist2d_plot(filename, _data_xrow, _data_yrow, _my_colors, _title, _xlabel, _ylabel, [], xmin=None, xmax=None, ymin=None, ymax=None, fontsize=10, dpi=100)
# File "/blah.py", line 14080, in draw_hist2d_plot
# _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^')
# File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 6247, in scatter
# self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs)
# File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 1685, in _process_unit_info
# self.xaxis.update_units(xdata)
# File "/usr/lib64/python2.7/site-packages/matplotlib/axis.py", line 1332, in update_units
# converter = munits.registry.get_converter(data)
# pylab.subplots_adjust(left = (5/25.4)/_figure.xsize, bottom = (4/25.4)/_figure.ysize, right = 1 - (1/25.4)/_figure.xsize, top = 1 - (3/25.4)/_figure.ysize)
_ax1.set_xlabel(xlabel_data, fontsize=fontsize)
_ax1.set_ylabel(ylabel_data, fontsize=fontsize)
_ax1.set_xmargin(0.05)
_ax1.set_ymargin(0.05)
_ax1.set_autoscale_on(False)
set_limits(_ax1, xmin, xmax, ymin, ymax)
if fontsize == 10:
_ax1.set_title('\n'.join(wrap(title_data, 100)), fontsize=fontsize+2)
elif fontsize == 12:
_ax1.set_title('\n'.join(wrap(title_data, 90)), fontsize=fontsize+2)
else:
_ax1.set_title('\n'.join(wrap(title_data, 100)), fontsize=fontsize+2)
if legends:
_figure.savefig(filename, dpi=100) #, bbox_inches='tight')
del(_my_PathCollection)
del(_ax2)
else:
_figure.savefig(filename, dpi=100)
del(_series)
del(_ax1)
_figure.clear()
del(_figure)
pylab.clf()
pylab.close()
# pylab.rcdefaults()
gc.collect()
That's the whole function. I used to suspect _ax1.scatter() in the past but probably
only because I hit the memory problems earlier. That is worked around now by using
on disk bsddb3 file or gdbm somewhere upstream. This particular function is nevertheless
fed with just a huge list numbers, and that is not the issue in itself.
I would be glad if I could tell matplotlib: Here you have 100 colors, use them for all data
as you wish, just spread them evenly over the whole dataset so that first 1/100th of the data
gets the first color, second 1/100th of the data gets the second color, and so on. Optionally,
if you would like to say: use the 100 colors in cycles for all data points, just loop through
the colors as long as you need some. In both scenarios, I could have avoided the two for loops
in the above code and necessity to generate those objects. Same for legend stuff.
Martin
>
> Mike
>
> On 10/10/2013 09:05 AM, Martin MOKREJŠ wrote:
>> Hi,
>> rendering some of my charts takes almost 50GB of RAM. I believe below is a stracktrace
>> of one such situation when it already took 15GB. Would somebody comments on what is
>> matplotlib doing at the very moment? Why the recursion?
>>
>> The charts had to have 262422 data points in a 2D scatter plot, each point has assigned
>> its own color. They are in batches so that there are 153 distinct colors but nevertheless,
>> I assigned to each data point a color value. There are 153 legend items also (one color
>> won't be used).
>>
>> ^CTraceback (most recent call last):
>> ...
>> _figure.savefig(filename, dpi=100)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 1421, in savefig
>> self.canvas.print_figure(*args, **kwargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/backend_bases.py", line 2220, in print_figure
>> **kwargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 505, in print_png
>> FigureCanvasAgg.draw(self)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 451, in draw
>> self.figure.draw(self.renderer)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper
>> draw(artist, renderer, *args, **kwargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 1034, in draw
>> func(*args)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper
>> draw(artist, renderer, *args, **kwargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 2086, in draw
>> a.draw(renderer)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper
>> draw(artist, renderer, *args, **kwargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 718, in draw
>> return Collection.draw(self, renderer)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, in draw_wrapper
>> draw(artist, renderer, *args, **kwargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 276, in draw
>> offsets, transOffset, self.get_facecolor(), self.get_edgecolor(),
>> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 551, in get_edgecolor
>> return self._edgecolors
>> KeyboardInterrupt
>> ^CError in atexit._run_exitfuncs:
>> Traceback (most recent call last):
>> File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
>> func(*targs, **kargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", line 90, in destroy_all
>> gc.collect()
>> KeyboardInterrupt
>> Error in sys.exitfunc:
>> Traceback (most recent call last):
>> File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
>> func(*targs, **kargs)
>> File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", line 90, in destroy_all
>> gc.collect()
>> KeyboardInterrupt
>>
>> ^C
>>
>>
>> Clues what is the code doing? I use mpl-1.3.0.
>> Thank you,
>> Martin
>>
>> ------------------------------------------------------------------------------
>> October Webinars: Code for Performance
>> Free Intel webinars can help you accelerate application performance.
>> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from
>> the latest Intel processors and coprocessors. See abstracts and register >
>> http://pubads.g.doubleclick.net/gampad/clk?id=60134071&iu=/4140/ostg.clktrk
>> _______________________________________________
>> Matplotlib-users mailing list
>> Mat...@li...
>> https://lists.sourceforge.net/lists/listinfo/matplotlib-users
>
>
--
Martin Mokrejs, Ph.D.
Bioinformatics
Donovalska 1658
149 00 Prague
Czech Republic
http://www.iresite.org
http://www.iresite.org/~mmokrejs
|