# https://en.wikipedia.org/wiki/Percentile#Definition_of_the_Weighted_Percentile_method
weights = [1, 1.0, 1, 1.0]
values = [100, 150, 75, 10]
values,weights = zip(*sorted(zip(values,weights), key=lambda vw: vw[0]))
values,weights
Sn = lambda ws: lambda n: sum(ws[:n+1])
# Now compute all nth partial sums:
list(map(Sn(weights), range(4)))
len(weights), sum(weights), Sn(weights)(len(weights)-1)
_pn = lambda ws: lambda n: 100 / Sn(ws)(len(ws) - 1) * (Sn(ws)(n) - ws[n] / 2.0)
list(map(_pn(weights), range(4)))
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as p
import numpy as np
def v(vs,ws):
# Sort by value:
pn = _pn(ws)
def perc(P):
#print(P,ws,list(map(pn, range(len(ws)))))
for k,p in enumerate(map(pn, range(len(ws)))):
if p > P:
k = k - 1
#print(P, k, p, vs[k], vs[k+1], pn(k), pn(k+1))
if k == -1: return vs[0]
return vs[k] + (P - pn(k)) / (pn(k+1) - pn(k)) * (vs[k+1] - vs[k])
return vs[-1]
return perc
wp0 = [v(values, weights)(i) for i in range(100)]
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), np.array(wp0))
def weighted_percentile(percs, vs, ws):
""" Use linear interpolation to calculate the weighted percentile.
Value and weight arrays are first sorted by value. The cumulative
distribution function (cdf) is then computed, after which np.interp
finds the two values closest to our desired weighted percentile(s)
and linearly interpolates them.
percs :: List of percentiles we want to calculate
vs :: Array of values we are computing the percentile of
ws :: Array of weights for our corresponding values
return :: Array of percentiles
"""
idx = np.argsort(vs)
vs, ws = vs[idx], ws[idx] # weights and values sorted by value
cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
return np.interp(percs, cdf, vs) # linear interpolation
wp1 = weighted_percentile(np.arange(100), np.array(values), np.array(weights))
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), wp1)
p.xlabel("percentile"); p.ylabel("value")
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), wp1, c='green')
p.scatter(range(100), wp0, c='red')
cdf = 100 * np.array(weights).cumsum() / np.array(weights).sum()
np.interp(np.arange(100), cdf, values) # linear interpolation
weights, values
cdf
values2 = np.array([15, 20, 35, 40, 50])
wp3 = weighted_percentile(np.arange(100), values2, np.ones(len(values2)))
p.figure(figsize=(8*1.618, 8))
p.scatter(range(100), wp3, c='green')
weighted_percentile(np.array([5,30,40,95]), values2, np.ones(len(values2)))
# Linear Interpolation Between Closest Ranks method via wikipedia says:
# [15, 20, 27.5, 50]