Skip to content

Instantly share code, notes, and snippets.

@menski
Created November 29, 2011 16:24
Show Gist options
  • Save menski/1405404 to your computer and use it in GitHub Desktop.
Save menski/1405404 to your computer and use it in GitHub Desktop.
Score algorithm tests for servload
#!/usr/bin/env python3
import math
import collections
import functools
import subprocess
MAX_SCORE = 100
Sample = collections.namedtuple('Sample', 'name, data')
Result = collections.namedtuple('Result', 'data, scores')
def create_result(data):
"""Create new Result tuple with given data and empty scores"""
return Result(data=data, scores=list())
def add_sample(sample_set, name, data):
"""Add sample with name and data into sample_set"""
sample_set.add(Sample(name=name, data=tuple(data)))
def create_samples():
"""Create samples and save them in sample_set"""
sample_set = set()
add = functools.partial(add_sample, sample_set)
add('evenly distributed', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
add('runaway', [1, 2, 1, 3, 4, 4, 4, 6, 6, 8, 9, 9, 26])
add('min/max agglomeration',
[1, 1, 1, 1, 1, 5, 6, 7, 8, 15, 15, 15, 15, 15, 15])
add('median agglomeration',
[2, 6, 9, 10, 11, 12, 13, 14, 14, 15, 15, 15, 16, 17, 17, 20, 22,
22, 27, 29, 30, 34])
add('random 1',
[1, 6, 8, 8, 5, 5, 6, 0, 8, 1, 9, 7, 13, 15, 11, 23, 24, 12, 11,
18, 24, 26, 28, 14, 20, 27, 17, 22, 17, 18, 20, 18, 12, 24, 10, 28,
29, 25, 29, 12, 23, 22, 28, 20, 22, 27, 28, 30, 21, 16, 28, 25, 19,
16, 28, 29, 11, 27, 25, 15, 27, 131, 100, 81, 118, 140, 77, 167,
154, 168, 174, 155, 108, 37, 85, 90, 124, 67, 161, 111, 41])
add('random 2',
[40, 16, 28, 6, 40, 17, 29, 18, 30, 76, 50, 67, 60, 100, 55, 56, 90,
82, 85, 97, 97, 79, 96, 72, 82, 81, 89, 94, 58, 52, 53, 94, 61, 87,
73, 65, 64, 63, 60, 69, 87, 85, 56, 98, 64, 62, 81, 68, 91, 72, 98,
56, 87, 76, 84, 66, 73, 66, 154, 175, 167, 193, 112, 151, 186, 119,
128, 130, 125])
return sample_set
def test_sample(sample, methods):
print('Sample: {} (elements: {})'.format(sample.name, len(sample.data)))
print('Methods:')
table = list()
for nr, method in enumerate(methods, 1):
result = method(sample.data)
print('\t{:>2}: {} {}'.format(nr, method.__name__, result.data))
if not table:
header = [r[0] for r in result.scores]
header.insert(0, 'method')
header.insert(1, '-' * 6)
table.append(header)
entry = [r[1] for r in result.scores]
entry.insert(0, nr)
entry.insert(1, '-' * 3)
table.append(entry)
print_results(table, methods)
gnuplot(sample, methods, table)
def print_results(table, methods):
"""Print resultse"""
print('Results:')
line_fmt = '{:>10}'
line_fmt += ' | {:^3}' * len(methods)
for i in range(len(table[0])):
print(line_fmt.format(*[entry[i] for entry in table]))
print()
def gnuplot(sample, methods, table):
"""Plot entries of result table for sample."""
gplot = subprocess.Popen(['gnuplot', '-'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stdin=subprocess.PIPE)
def send(msg):
gplot.stdin.write('{}\n'.format(msg).encode())
send('set title "{}"'.format(sample.name))
send('set xlabel "value"')
send('set ylabel "score"')
send('set key outside')
send('set parametric')
send('const={:.3f}'.format(median(sample.data)))
send('set trange[0:{}]'.format(MAX_SCORE))
send('set yrange [0:{}]'.format(MAX_SCORE))
send('set mytics 5')
send('set ytics 5')
send('set grid')
plotcmd = 'plot const,t t "median",'
plotcmd += ','.join([' "-" t "{}" w lp'.format(method.__name__)
for method in methods])
send(plotcmd)
for method in range(1, len(table)):
for entry in range(2, len(table[method])):
send('{} {}'.format(table[0][entry], table[method][entry]))
send('e')
input('Press key to continue')
send('quit')
return gplot.wait()
def median(elements):
selements = sorted(elements)
l = len(elements)
m = int(l / 2)
if l % 2 == 1:
return selements[m]
else:
return (selements[m - 1] + selements[m]) / 2
def std(elements, m):
return math.sqrt(sum((e - m) ** 2 for e in elements) / len(elements))
def score_std_norm_log(elements):
"""Calculate score with median, std and log_1/2"""
m = median(elements)
s = std(elements, m)
result = create_result({'median': m, 'std': s})
for e in sorted(elements):
n = abs((e - m) / s)
score = MAX_SCORE if n == 0 else round(math.log(n, 1 / 2))
score = max(score, 0)
result.scores.append((e, score))
return result
def score_std(elements):
"""Calculate score with median and std"""
m = median(elements)
s = std(elements, m)
result = create_result({'median': m, 'std': s})
for e in sorted(elements):
dist = abs(m - e)
score = round(MAX_SCORE * (1 - dist / s))
score = max(score, 0)
result.scores.append((e, score))
return result
def score_absolute_deviation(elements):
"""Calculate score with absolute deviation
see http://en.wikipedia.org/wiki/Average_absolute_deviation
"""
median_element = median(elements)
avg_abs_dev = 1 / len(elements) * sum([abs(element - median_element)
for element in elements])
result = create_result({'median': median_element,
'avg_abs_dev': avg_abs_dev})
for e in sorted(elements):
dist = abs(e - median_element)
score = round(MAX_SCORE * (1 - dist / avg_abs_dev))
score = max(score, 0)
result.scores.append((e, score))
return result
def score_median_absolute_deviation(elements):
"""Calculate score with median absolute deviation
see http://en.wikipedia.org/wiki/Median_absolute_deviation
"""
median_element = median(elements)
median_abs_dev = median([abs(e - median_element) for e in elements])
result = create_result({'median': median_element,
'median_abs_dev': median_abs_dev})
for e in sorted(elements):
dist = abs(e - median_element)
score = round(MAX_SCORE * (1 - dist / median_abs_dev))
score = max(score, 0)
result.scores.append((e, score))
return result
def main():
sample_set = create_samples()
methods = (score_std_norm_log, score_std, score_absolute_deviation,
score_median_absolute_deviation)
for sample in sample_set:
test_sample(sample, methods)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment