-
-
Save aslagle/d6f9d8c4cb2d437914572db372c562b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Result for In Brooklyn | |
[('bklyn', 0.999444756826846), ('nypl', 0.9512254693898606), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)] | |
['bklyn', 'nypl', 'ia', 'nyu_press'] expected | |
------------------------------------ | |
Result for In Syracuse - 200 km west of Albany | |
[('nypl', 0.15723651253820076), ('bklyn', 0.1571498614504659), ('ia', 0.0021984559630425313), ('nyu_press', 0.0006276865004806642), ('unm', 1.8230146414748216e-09), ('albany', 7.531464476485446e-10)] | |
['nypl', 'bklyn', 'ia', 'nyu_press'] expected | |
------------------------------------ | |
Result for In Albany | |
[('albany', 0.9932613855290392), ('nypl', 0.3166354528370172), ('bklyn', 0.3164609589744859), ('ia', 0.0021984559630425313), ('nyu_press', 0.0012640053897856058), ('unm', 1.8230146414748216e-09)] | |
['albany', 'nypl', 'bklyn', 'ia', 'nyu_press'] expected | |
------------------------------------ | |
Result for In Queens | |
[('nypl', 0.9512254693898606), ('bklyn', 0.9507012608566567), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)] | |
['nypl', 'bklyn', 'ia', 'nyu_press'] expected | |
------------------------------------ | |
Result for In Las Cruces, NM | |
[('unm', 0.005959466533252207), ('ia', 0.0021984559630425313), ('nyu_press', 1.6526519014779194e-10), ('nypl', 2.1313139666483575e-138), ('bklyn', 2.130139426011877e-138), ('albany', 2.476517792606116e-160)] | |
['unm', 'ia'] expected | |
------------------------------------ | |
Result for In New Jersey | |
[('nypl', 0.00524749658055757), ('bklyn', 0.005244604750414239), ('ia', 0.0021984559630425313), ('nyu_press', 2.0947951030993548e-05), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)] | |
['nypl', 'bklyn', 'ia', 'nyu_press'] expected | |
------------------------------------ | |
Result for In Manhattan, Spanish speaker | |
[('nypl', 0.9999958421062768), ('bklyn', 0.9507012608566567), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)] | |
['nypl', 'bklyn', 'ia', 'unm'] expected | |
------------------------------------ | |
Result for In Manhattan | |
[('nypl', 0.9999958421062768), ('bklyn', 0.9507012608566567), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)] | |
['nypl', 'bklyn', 'ia', 'nyu_press'] expected | |
------------------------------------ | |
Result for In Albany, Russian speaker | |
[('albany', 0.9932613855290392), ('nypl', 0.3166354528370172), ('bklyn', 0.3164609589744859), ('ia', 0.0021984559630425313), ('nyu_press', 0.0012640053897856058), ('unm', 1.8230146414748216e-09)] | |
['nypl', 'albany', 'ia'] expected | |
------------------------------------ | |
Result for In Manhattan, with a11y features on | |
[('nypl', 0.9999958421062768), ('bklyn', 0.9507012608566567), ('bard', 0.8957489722975418), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)] | |
['nypl', 'bklyn', 'bard', 'ia', 'nyu_press'] expected | |
------------------------------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
libraries = dict() | |
libraries['nypl'] = dict( | |
collections=dict(en=150000, es=20000, ru=5000), | |
audience='general', | |
focus_area=321, | |
service_area=141000, | |
) | |
libraries['bklyn'] = dict( | |
collections=dict(en=75000, es=10000), | |
audience='general', | |
focus_area=180, | |
service_area=141000, | |
) | |
libraries['albany'] = dict( | |
collections=dict(en=50000, es=5000), | |
audience='general', | |
focus_area=56, | |
service_area=56, | |
) | |
libraries['nyu_lib'] = dict( | |
collections=dict(en=100000), | |
audience='secondary', | |
focus_area=789, | |
service_area=789, | |
) | |
libraries['nyu_press'] = dict( | |
collections=dict(en=40), | |
audience='general', | |
focus_area=789, | |
service_area=510000000, | |
) | |
libraries['unm'] = dict( | |
collections=dict(en=60, es=10), | |
audience='general', | |
focus_area=315000, | |
service_area=510000000, | |
) | |
libraries['bard'] = dict( | |
collections=dict(en=100000), | |
audience='a11y', | |
focus_area=10000000, | |
service_area=10000000, | |
) | |
libraries['ia'] = dict( | |
collections=dict(en=10000000, es=1000, ru=1000), | |
audience='general', | |
focus_area=510000000, | |
service_area=510000000, | |
) | |
scenarios = dict() | |
scenarios['In Manhattan'] = dict( | |
focus_distances=dict(nypl=0, bklyn=10, albany=230, | |
nyu_lib=0, nyu_press=0, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=230, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['nypl', 'bklyn', 'ia', 'nyu_press'], | |
) | |
scenarios['In Brooklyn'] = dict( | |
focus_distances = dict(nypl=10, bklyn=0, albany=230, | |
nyu_lib=0, nyu_press=0, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=230, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['bklyn', 'nypl', 'ia', 'nyu_press'], | |
) | |
scenarios['In Queens'] = dict( | |
focus_distances=dict(nypl=10, bklyn=10, albany=230, | |
nyu_lib=0, nyu_press=0, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=230, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['nypl', 'bklyn', 'ia', 'nyu_press'], | |
) | |
scenarios['In Albany'] = dict( | |
focus_distances=dict(nypl=230, bklyn=230, albany=0, | |
nyu_lib=230, nyu_press=230, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=0, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['albany', 'nypl', 'bklyn', 'ia', 'nyu_press'], | |
) | |
scenarios['In Syracuse - 200 km west of Albany'] = dict( | |
focus_distances=dict(nypl=370, bklyn=370, albany=200, | |
nyu_lib=370, nyu_press=370, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=200, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['nypl', 'bklyn', 'ia', 'nyu_press'], | |
) | |
scenarios['In New Jersey'] = dict( | |
focus_distances=dict(nypl=50, bklyn=50, albany=230, | |
nyu_lib=50, nyu_press=50, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=50, bklyn=50, albany=230, | |
nyu_lib=50, nyu_press=50, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['nypl', 'bklyn', 'ia', 'nyu_press'], | |
) | |
scenarios['In Las Cruces, NM'] = dict( | |
focus_distances=dict(nypl=3400, bklyn=3400, albany=3500, | |
nyu_lib=3400, nyu_press=3400, unm=0, | |
bard=0, ia=0), | |
service_distances=dict(nypl=3000, bklyn=3000, albany=3500, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['unm', 'ia'], | |
) | |
scenarios['In Albany, Russian speaker'] = dict( | |
focus_distances=dict(nypl=230, bklyn=230, albany=0, | |
nyu_lib=230, nyu_press=230, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=0, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['nypl', 'albany', 'ia'], | |
) | |
scenarios['In Manhattan, Spanish speaker'] = dict( | |
focus_distances=dict(nypl=0, bklyn=10, albany=230, | |
nyu_lib=0, nyu_press=0, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=230, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='general', | |
expected=['nypl', 'bklyn', 'ia', 'unm'], | |
) | |
scenarios['In Manhattan, with a11y features on'] = dict( | |
focus_distances=dict(nypl=0, bklyn=10, albany=230, | |
nyu_lib=0, nyu_press=0, unm=3000, | |
bard=0, ia=0), | |
service_distances=dict(nypl=0, bklyn=0, albany=230, | |
nyu_lib=0, nyu_press=0, unm=0, | |
bard=0, ia=0), | |
lang='en', | |
audience='a11y', | |
expected=['nypl', 'bklyn', 'bard', 'ia', 'nyu_press'], | |
) | |
collection_size_factor = 1000 | |
audience_factor = 1.01 | |
focus_area_distance_factor = 0.005 | |
service_area_distance_factor = 0.1 | |
focus_area_size_factor = 0.000000012 | |
score_threshold = 0 | |
from pdb import set_trace | |
from math import log, exp | |
def decay(rate, value): | |
return exp(-1.0 * rate * value) | |
def compute_score(scenario, library, library_name): | |
score = 1 | |
audience = 0 | |
if library.get('audience') != 'general' and scenario.get('audience') != library.get('audience'): | |
return 0 | |
if library.get('audience') != 'general' and scenario.get('audience') == library.get('audience'): | |
audience += 1 | |
if audience: | |
score = score * audience_factor | |
lang = scenario.get('lang') | |
collection_size = library.get('collections').get(lang) | |
max_collection_for_lang = max([l.get("collections").get(lang) for l in libraries.values()]) | |
score = score * (1 - decay(collection_size_factor, 1.0 * collection_size / max_collection_for_lang)) | |
focus_area_distance = scenario.get('focus_distances').get(library_name) | |
service_area_distance = scenario.get("service_distances").get(library_name) | |
score = score * decay(focus_area_distance_factor, focus_area_distance) | |
score = score * decay(service_area_distance_factor, service_area_distance) | |
focus_area_size = library.get("focus_area") | |
score = score * decay(focus_area_size_factor, focus_area_size) | |
return score | |
if __name__ == '__main__': | |
for scenario_name, scenario in scenarios.iteritems(): | |
scores = dict() | |
for library_name, library in libraries.iteritems(): | |
score = compute_score(scenario, library, library_name) | |
if score > score_threshold: | |
scores[library_name] = score | |
print "Result for %s" % scenario_name | |
print [i for i in sorted(scores.items(), key=lambda x: -x[1])] | |
print scenario.get('expected'), " expected" | |
print "------------------------------------" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment