In [1]:
# https://en.wikipedia.org/wiki/Percentile#Definition_of_the_Weighted_Percentile_method
In [31]:
weights = [1, 1.0, 1, 1.0]
values  = [100, 150, 75, 10]
values,weights = zip(*sorted(zip(values,weights), key=lambda vw: vw[0]))

values,weights
Out[31]:
((10, 75, 100, 150), (1.0, 1, 1, 1.0))
In [47]:
Sn = lambda ws: lambda n: sum(ws[:n+1])

# Now compute all nth partial sums:
list(map(Sn(weights), range(4)))
Out[47]:
[1.0, 2.0, 3.0, 4.0]
In [43]:
len(weights), sum(weights), Sn(weights)(len(weights)-1)
Out[43]:
(4, 4.0, 4.0)
In [75]:
_pn = lambda ws: lambda n: 100 / Sn(ws)(len(ws) - 1) * (Sn(ws)(n) - ws[n] / 2.0)

list(map(_pn(weights), range(4)))
Out[75]:
[12.5, 37.5, 62.5, 87.5]
In [76]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as p
import numpy as np
In [77]:
def v(vs,ws):
    # Sort by value:
    pn = _pn(ws)
    def perc(P):
        #print(P,ws,list(map(pn, range(len(ws)))))
        for k,p in enumerate(map(pn, range(len(ws)))):
            if p > P:
                k = k - 1
                #print(P, k, p, vs[k], vs[k+1], pn(k), pn(k+1))
                if k == -1: return vs[0]
                return vs[k] + (P - pn(k)) / (pn(k+1) - pn(k)) * (vs[k+1] - vs[k])
        return vs[-1]
    return perc

wp0 = [v(values, weights)(i) for i in range(100)]
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), np.array(wp0))
Out[77]:
<matplotlib.collections.PathCollection at 0x7f14bf7c7dd8>
In [78]:
def weighted_percentile(percs, vs, ws):
    """ Use linear interpolation to calculate the weighted percentile.
        
        Value and weight arrays are first sorted by value. The cumulative
        distribution function (cdf) is then computed, after which np.interp
        finds the two values closest to our desired weighted percentile(s)
        and linearly interpolates them.
        
        percs  :: List of percentiles we want to calculate
        vs     :: Array of values we are computing the percentile of
        ws     :: Array of weights for our corresponding values
        return :: Array of percentiles
    """
    idx = np.argsort(vs)
    vs, ws = vs[idx], ws[idx] # weights and values sorted by value
    cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
    return np.interp(percs, cdf, vs) # linear interpolation
In [79]:
wp1 = weighted_percentile(np.arange(100), np.array(values), np.array(weights))
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), wp1)
p.xlabel("percentile"); p.ylabel("value")
Out[79]:
<matplotlib.text.Text at 0x7f14bf6b11d0>
In [80]:
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), wp1, c='green')
p.scatter(range(100), wp0, c='red')
Out[80]:
<matplotlib.collections.PathCollection at 0x7f14bf521fd0>
In [81]:
cdf = 100 * np.array(weights).cumsum() / np.array(weights).sum()
np.interp(np.arange(100), cdf, values) # linear interpolation
Out[81]:
array([  10. ,   10. ,   10. ,   10. ,   10. ,   10. ,   10. ,   10. ,
         10. ,   10. ,   10. ,   10. ,   10. ,   10. ,   10. ,   10. ,
         10. ,   10. ,   10. ,   10. ,   10. ,   10. ,   10. ,   10. ,
         10. ,   10. ,   12.6,   15.2,   17.8,   20.4,   23. ,   25.6,
         28.2,   30.8,   33.4,   36. ,   38.6,   41.2,   43.8,   46.4,
         49. ,   51.6,   54.2,   56.8,   59.4,   62. ,   64.6,   67.2,
         69.8,   72.4,   75. ,   76. ,   77. ,   78. ,   79. ,   80. ,
         81. ,   82. ,   83. ,   84. ,   85. ,   86. ,   87. ,   88. ,
         89. ,   90. ,   91. ,   92. ,   93. ,   94. ,   95. ,   96. ,
         97. ,   98. ,   99. ,  100. ,  102. ,  104. ,  106. ,  108. ,
        110. ,  112. ,  114. ,  116. ,  118. ,  120. ,  122. ,  124. ,
        126. ,  128. ,  130. ,  132. ,  134. ,  136. ,  138. ,  140. ,
        142. ,  144. ,  146. ,  148. ])
In [82]:
weights, values
Out[82]:
((1.0, 1, 1, 1.0), (10, 75, 100, 150))
In [83]:
cdf
Out[83]:
array([  25.,   50.,   75.,  100.])
In [84]:
values2 = np.array([15, 20, 35, 40, 50])
wp3 = weighted_percentile(np.arange(100), values2, np.ones(len(values2)))

p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), wp3, c='green')
Out[84]:
<matplotlib.collections.PathCollection at 0x7f14bf50ceb8>
In [85]:
weighted_percentile(np.array([5,30,40,95]), values2, np.ones(len(values2)))
# Linear Interpolation Between Closest Ranks method via wikipedia says:
#     [15, 20, 27.5, 50]
Out[85]:
array([ 15. ,  20. ,  27.5,  50. ])
In [ ]: