from numarray import * #################### # Quantile #################### def quantile (x, probs = (0, 0.25, 0.5, 0.75, 1)): """quantile(x,probs) returns the quantiles corresponding to the given probabilities. It is implemented from the R documentation, whence most of this text is also derived. x is a vector (1-dimensional array) whose sample quantiles are wanted probs is any sequence with values in [0,1]. By default, it is the extremes and quartiles: (0, 0.25, 0.5, 0.75, 1) The smallest observation corresponds to a probability of 0 and the largest to a probability of 1. It uses linear interpolation, in the style of R. It returns a vector of length len(probs). 'quantile(x,p)' as a function of 'p' linearly interpolates the points ( (i-1)/(n-1), ox[i] ), where: 'ox = sort(x)', and 'n = len(x)' This gives 'quantile(x, p) == (1-f)*ox[i] + f*ox[i+1]', where: 'r = 1 + (n-1)*p', 'i = floor(r)', 'f = r - i', and 'ox[n] is defined to be ox[n-1]' Therefore, if len(x) = 10, then quantile(x,(0.25)) returns the value that would occur at ox[2.25], which is to say the value expected one quarter of the way along the sorted vector. """ assert len(x.shape) == 1 assert max(probs) <= 1.0 assert min(probs) >= 0.0 n = len(x) m = len(probs) ox = sort(x) ans = ones((m), Float) for c in range(m): p = probs[c] r = 1 + (n-1) * p #i = floor(r) i = int(r) f = r - i j = min((i,n)) # extra parens bc MLab min() takes array, k = min((i+1,n))# and overrides built-in min ans[c] = (1-f)*ox[j-1] + f*ox[k-1] # Note we used j-1 and k-1 bc Python arrays start at 0, # while these formulas came from R, where arrays start at 1. return ans print quantile(array([1,2,3,4,5,6,7,8,9,10])) print quantile(array([0.23,1.0023,1.223,1.235,5.6,9.0,23.3456,34.458,34.56,78.9]))