Created
November 29, 2011 16:24
-
-
Save menski/1405404 to your computer and use it in GitHub Desktop.
Score algorithm tests for servload
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import math | |
import collections | |
import functools | |
import subprocess | |
MAX_SCORE = 100 | |
Sample = collections.namedtuple('Sample', 'name, data') | |
Result = collections.namedtuple('Result', 'data, scores') | |
def create_result(data): | |
"""Create new Result tuple with given data and empty scores""" | |
return Result(data=data, scores=list()) | |
def add_sample(sample_set, name, data): | |
"""Add sample with name and data into sample_set""" | |
sample_set.add(Sample(name=name, data=tuple(data))) | |
def create_samples(): | |
"""Create samples and save them in sample_set""" | |
sample_set = set() | |
add = functools.partial(add_sample, sample_set) | |
add('evenly distributed', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) | |
add('runaway', [1, 2, 1, 3, 4, 4, 4, 6, 6, 8, 9, 9, 26]) | |
add('min/max agglomeration', | |
[1, 1, 1, 1, 1, 5, 6, 7, 8, 15, 15, 15, 15, 15, 15]) | |
add('median agglomeration', | |
[2, 6, 9, 10, 11, 12, 13, 14, 14, 15, 15, 15, 16, 17, 17, 20, 22, | |
22, 27, 29, 30, 34]) | |
add('random 1', | |
[1, 6, 8, 8, 5, 5, 6, 0, 8, 1, 9, 7, 13, 15, 11, 23, 24, 12, 11, | |
18, 24, 26, 28, 14, 20, 27, 17, 22, 17, 18, 20, 18, 12, 24, 10, 28, | |
29, 25, 29, 12, 23, 22, 28, 20, 22, 27, 28, 30, 21, 16, 28, 25, 19, | |
16, 28, 29, 11, 27, 25, 15, 27, 131, 100, 81, 118, 140, 77, 167, | |
154, 168, 174, 155, 108, 37, 85, 90, 124, 67, 161, 111, 41]) | |
add('random 2', | |
[40, 16, 28, 6, 40, 17, 29, 18, 30, 76, 50, 67, 60, 100, 55, 56, 90, | |
82, 85, 97, 97, 79, 96, 72, 82, 81, 89, 94, 58, 52, 53, 94, 61, 87, | |
73, 65, 64, 63, 60, 69, 87, 85, 56, 98, 64, 62, 81, 68, 91, 72, 98, | |
56, 87, 76, 84, 66, 73, 66, 154, 175, 167, 193, 112, 151, 186, 119, | |
128, 130, 125]) | |
return sample_set | |
def test_sample(sample, methods): | |
print('Sample: {} (elements: {})'.format(sample.name, len(sample.data))) | |
print('Methods:') | |
table = list() | |
for nr, method in enumerate(methods, 1): | |
result = method(sample.data) | |
print('\t{:>2}: {} {}'.format(nr, method.__name__, result.data)) | |
if not table: | |
header = [r[0] for r in result.scores] | |
header.insert(0, 'method') | |
header.insert(1, '-' * 6) | |
table.append(header) | |
entry = [r[1] for r in result.scores] | |
entry.insert(0, nr) | |
entry.insert(1, '-' * 3) | |
table.append(entry) | |
print_results(table, methods) | |
gnuplot(sample, methods, table) | |
def print_results(table, methods): | |
"""Print resultse""" | |
print('Results:') | |
line_fmt = '{:>10}' | |
line_fmt += ' | {:^3}' * len(methods) | |
for i in range(len(table[0])): | |
print(line_fmt.format(*[entry[i] for entry in table])) | |
print() | |
def gnuplot(sample, methods, table): | |
"""Plot entries of result table for sample.""" | |
gplot = subprocess.Popen(['gnuplot', '-'], stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, stdin=subprocess.PIPE) | |
def send(msg): | |
gplot.stdin.write('{}\n'.format(msg).encode()) | |
send('set title "{}"'.format(sample.name)) | |
send('set xlabel "value"') | |
send('set ylabel "score"') | |
send('set key outside') | |
send('set parametric') | |
send('const={:.3f}'.format(median(sample.data))) | |
send('set trange[0:{}]'.format(MAX_SCORE)) | |
send('set yrange [0:{}]'.format(MAX_SCORE)) | |
send('set mytics 5') | |
send('set ytics 5') | |
send('set grid') | |
plotcmd = 'plot const,t t "median",' | |
plotcmd += ','.join([' "-" t "{}" w lp'.format(method.__name__) | |
for method in methods]) | |
send(plotcmd) | |
for method in range(1, len(table)): | |
for entry in range(2, len(table[method])): | |
send('{} {}'.format(table[0][entry], table[method][entry])) | |
send('e') | |
input('Press key to continue') | |
send('quit') | |
return gplot.wait() | |
def median(elements): | |
selements = sorted(elements) | |
l = len(elements) | |
m = int(l / 2) | |
if l % 2 == 1: | |
return selements[m] | |
else: | |
return (selements[m - 1] + selements[m]) / 2 | |
def std(elements, m): | |
return math.sqrt(sum((e - m) ** 2 for e in elements) / len(elements)) | |
def score_std_norm_log(elements): | |
"""Calculate score with median, std and log_1/2""" | |
m = median(elements) | |
s = std(elements, m) | |
result = create_result({'median': m, 'std': s}) | |
for e in sorted(elements): | |
n = abs((e - m) / s) | |
score = MAX_SCORE if n == 0 else round(math.log(n, 1 / 2)) | |
score = max(score, 0) | |
result.scores.append((e, score)) | |
return result | |
def score_std(elements): | |
"""Calculate score with median and std""" | |
m = median(elements) | |
s = std(elements, m) | |
result = create_result({'median': m, 'std': s}) | |
for e in sorted(elements): | |
dist = abs(m - e) | |
score = round(MAX_SCORE * (1 - dist / s)) | |
score = max(score, 0) | |
result.scores.append((e, score)) | |
return result | |
def score_absolute_deviation(elements): | |
"""Calculate score with absolute deviation | |
see http://en.wikipedia.org/wiki/Average_absolute_deviation | |
""" | |
median_element = median(elements) | |
avg_abs_dev = 1 / len(elements) * sum([abs(element - median_element) | |
for element in elements]) | |
result = create_result({'median': median_element, | |
'avg_abs_dev': avg_abs_dev}) | |
for e in sorted(elements): | |
dist = abs(e - median_element) | |
score = round(MAX_SCORE * (1 - dist / avg_abs_dev)) | |
score = max(score, 0) | |
result.scores.append((e, score)) | |
return result | |
def score_median_absolute_deviation(elements): | |
"""Calculate score with median absolute deviation | |
see http://en.wikipedia.org/wiki/Median_absolute_deviation | |
""" | |
median_element = median(elements) | |
median_abs_dev = median([abs(e - median_element) for e in elements]) | |
result = create_result({'median': median_element, | |
'median_abs_dev': median_abs_dev}) | |
for e in sorted(elements): | |
dist = abs(e - median_element) | |
score = round(MAX_SCORE * (1 - dist / median_abs_dev)) | |
score = max(score, 0) | |
result.scores.append((e, score)) | |
return result | |
def main(): | |
sample_set = create_samples() | |
methods = (score_std_norm_log, score_std, score_absolute_deviation, | |
score_median_absolute_deviation) | |
for sample in sample_set: | |
test_sample(sample, methods) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment