|
From: <as...@us...> - 2010-02-12 02:21:15
|
Revision: 8127
http://matplotlib.svn.sourceforge.net/matplotlib/?rev=8127&view=rev
Author: astraw
Date: 2010-02-12 02:21:05 +0000 (Fri, 12 Feb 2010)
Log Message:
-----------
Add option to bootstrap confidence intervals for boxplot (Paul Hobson)
Modified Paths:
--------------
trunk/matplotlib/CHANGELOG
trunk/matplotlib/lib/matplotlib/axes.py
Added Paths:
-----------
trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py
Modified: trunk/matplotlib/CHANGELOG
===================================================================
--- trunk/matplotlib/CHANGELOG 2010-02-11 13:15:28 UTC (rev 8126)
+++ trunk/matplotlib/CHANGELOG 2010-02-12 02:21:05 UTC (rev 8127)
@@ -1,3 +1,7 @@
+2010-02-11 Added 'bootstrap' option to boxplot. This allows bootstrap
+ estimates of median confidence intervals. Based on an
+ initial patch by Paul Hobson. - ADS
+
2010-02-06 Added setup.cfg "basedirlist" option to override setting
in setupext.py "basedir" dictionary; added "gnu0"
platform requested by Benjamin Drung. - EF
Added: trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py
===================================================================
--- trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py (rev 0)
+++ trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py 2010-02-12 02:21:05 UTC (rev 8127)
@@ -0,0 +1,27 @@
+import matplotlib.pyplot as plt
+import matplotlib.transforms as mtransforms
+import numpy as np
+
+np.random.seed(2)
+inc = 0.1
+e1 = np.random.uniform(0,1, size=(500,))
+e2 = np.random.uniform(0,1, size=(500,))
+e3 = np.random.uniform(0,1 + inc, size=(500,))
+e4 = np.random.uniform(0,1 + 2*inc, size=(500,))
+
+treatments = [e1,e2,e3,e4]
+
+fig = plt.figure()
+ax = fig.add_subplot(111)
+pos = np.array(range(len(treatments)))+1
+bp = ax.boxplot( treatments, sym='k+', patch_artist=True,
+ positions=pos, notch=1, bootstrap=5000 )
+text_transform= mtransforms.blended_transform_factory(ax.transData,
+ ax.transAxes)
+ax.set_xlabel('treatment')
+ax.set_ylabel('response')
+ax.set_ylim(-0.2, 1.4)
+plt.setp(bp['whiskers'], color='k', linestyle='-' )
+plt.setp(bp['fliers'], markersize=3.0)
+fig.subplots_adjust(right=0.99,top=0.99)
+plt.show()
Modified: trunk/matplotlib/lib/matplotlib/axes.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/axes.py 2010-02-11 13:15:28 UTC (rev 8126)
+++ trunk/matplotlib/lib/matplotlib/axes.py 2010-02-12 02:21:05 UTC (rev 8127)
@@ -4881,7 +4881,8 @@
return (l0, caplines, barcols)
def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
- positions=None, widths=None, patch_artist=False):
+ positions=None, widths=None, patch_artist=False,
+ bootstrap=None):
"""
call signature::
@@ -4910,6 +4911,16 @@
a function of the inner quartile range. They extend to the
most extreme data point within ( ``whis*(75%-25%)`` ) data range.
+ *bootstrap* (default None) specifies whether to bootstrap the
+ confidence intervals around the median for notched
+ boxplots. If bootstrap==None, no bootstrapping is performed,
+ and notches are calculated using a Gaussian-based asymptotic
+ approximation (see McGill, R., Tukey, J.W., and Larsen, W.A.,
+ 1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
+ specifies the number of times to bootstrap the median to
+ determine it's 95% confidence intervals. Values between 1000
+ and 10000 are recommended.
+
*positions* (default 1,2,...,n) sets the horizontal positions of
the boxes. The ticks and limits are automatically set to match
the positions.
@@ -5021,8 +5032,33 @@
med_x = [box_x_min, box_x_max]
# calculate 'notch' plot
else:
- notch_max = med + 1.57*iq/np.sqrt(row)
- notch_min = med - 1.57*iq/np.sqrt(row)
+ if bootstrap is not None:
+ # Do a bootstrap estimate of notch locations.
+ def bootstrapMedian(data, N=5000):
+ # determine 95% confidence intervals of the median
+ M = len(data)
+ percentile = [2.5,97.5]
+ estimate = np.zeros(N)
+ for n in range(N):
+ bsIndex = np.random.random_integers(0,M-1,M)
+ bsData = data[bsIndex]
+ estimate[n] = mlab.prctile(bsData, 50)
+ CI = mlab.prctile(estimate, percentile)
+ return CI
+
+ # get conf. intervals around median
+ CI = bootstrapMedian(d, N=bootstrap)
+ notch_max = CI[1]
+ notch_min = CI[0]
+ else:
+ # Estimate notch locations using Gaussian-based
+ # asymptotic approximation.
+ #
+ # For discussion: McGill, R., Tukey, J.W.,
+ # and Larsen, W.A. (1978) "Variations of
+ # Boxplots", The American Statistician, 32:12-16.
+ notch_max = med + 1.57*iq/np.sqrt(row)
+ notch_min = med - 1.57*iq/np.sqrt(row)
# make our notched box vectors
box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|