Skip to content

Instantly share code, notes, and snippets.

@g-i-o-
Created December 5, 2013 07:03
Show Gist options
  • Save g-i-o-/7801292 to your computer and use it in GitHub Desktop.
Save g-i-o-/7801292 to your computer and use it in GitHub Desktop.
So... you have an unsorted array of numbers and need a approximation to the median, but you don't want to waste a lot space nor sort the array, not even partially...
def sample_median(X):
"Returns an aproximate for the sample median for the array X"
n, xmax, xmin = 0, 0, 0
for x in X: # O(x)
if n:
xmax = max(x, xmax)
xmin = min(x, xmin)
else:
xmax = xmin = x
n+=1
a50 = n/2.0 +1
FREC_SIZE=16
below = 0
while True: # O(x log x)
Fcount, Fmin, Fmax = [0]*FREC_SIZE, [0]*FREC_SIZE,[0]*FREC_SIZE
for x in X: # O(x)
if x < xmin or x > xmax:
continue
x_idx = int((x - xmin) * (FREC_SIZE-1) / (xmax - xmin))
if Fcount[x_idx]:
Fmax[x_idx] = max(x, Fmax[x_idx])
Fmin[x_idx] = min(x, Fmin[x_idx])
else:
Fmax[x_idx] = Fmin[x_idx] = x
Fcount[x_idx] += 1
# print "Fcount:%r\nFmax:%r\nFmin:%r"%(Fcount, Fmax, Fmin)
a = below
for bin_idx in range(FREC_SIZE):
la = a
a += Fcount[bin_idx]
if a >= a50:
# print "turning point in %s (la:%s, a:%s)"%(bin_idx, la, a)
xmin, xmax = Fmin[bin_idx], Fmax[bin_idx]
below = la
break
#break
if xmax == xmin:
return xmax
if __name__ == '__main__':
import random
X = [random.randint(0, 10033) / 10000.0 for x in range(100)]
print "X:%r"%X
print "Sample median :%r"%sample_median(X)
X.sort()
lenX = len(X)
print "Half of len(X) is %s"%(lenX/2.0)
print zip(range(lenX/2-2, lenX/2+3), X[lenX/2 - 2 : lenX/2 + 3])
if lenX%2 == 0:
print "len(X) is even, so real median is avg(X[%s:%s]) == avg(%s, %s) = %s"%(
lenX/2-1, lenX/2, X[lenX/2-1], X[lenX/2], (X[lenX/2-1]+X[lenX/2])/2.0
)
else:
print "len(X) is odd, so real median is X[%s] = %s"%(lenX/2 + 1, X[lenX/2 + 1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment