from numarray import *
####################
# Quantile
####################
def quantile (x, probs = (0, 0.25, 0.5, 0.75, 1)):
"""quantile(x,probs) returns the quantiles corresponding to the
given probabilities. It is implemented from the R documentation,
whence most of this text is also derived.
x is a vector (1-dimensional array) whose sample quantiles are wanted
probs is any sequence with values in [0,1]. By default, it is
the extremes and quartiles: (0, 0.25, 0.5, 0.75, 1)
The smallest observation corresponds to a probability of 0 and the
largest to a probability of 1. It uses linear interpolation, in
the style of R.
It returns a vector of length *len(probs)*.
'quantile(x,p)' as a function of 'p' linearly interpolates the
points ( (i-1)/(n-1), ox[i] ), where:
'ox = sort(x)', and
'n = len(x)'
This gives 'quantile(x, p) == (1-f)*ox[i] + f*ox[i+1]', where:
'r = 1 + (n-1)*p',
'i = floor(r)',
'f = r - i', and
'ox[n] is defined to be ox[n-1]'
Therefore, if len(x) = 10, then quantile(x,(0.25)) returns
the value that would occur at ox[2.25], which is to say the
value expected one quarter of the way along the sorted vector.
"""
assert len(x.shape) == 1
assert max(probs) <= 1.0
assert min(probs) >= 0.0
n = len(x)
m = len(probs)
ox = sort(x)
ans = ones((m), Float)
for c in range(m):
p = probs[c]
r = 1 + (n-1) * p
#i = floor(r)
i = int(r)
f = r - i
j = min((i,n)) # extra parens bc MLab min() takes array,
k = min((i+1,n))# and overrides built-in min
ans[c] = (1-f)*ox[j-1] + f*ox[k-1]
# Note we used j-1 and k-1 bc Python arrays start at 0,
# while these formulas came from R, where arrays start at 1.
return ans
print quantile(array([1,2,3,4,5,6,7,8,9,10]))
print quantile(array([0.23,1.0023,1.223,1.235,5.6,9.0,23.3456,34.458,34.56,78.9]))