Skip to content

Instantly share code, notes, and snippets.

@aslagle

aslagle/output Secret

Created August 2, 2017 15:05
Show Gist options
  • Save aslagle/d6f9d8c4cb2d437914572db372c562b5 to your computer and use it in GitHub Desktop.
Save aslagle/d6f9d8c4cb2d437914572db372c562b5 to your computer and use it in GitHub Desktop.
Result for In Brooklyn
[('bklyn', 0.999444756826846), ('nypl', 0.9512254693898606), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)]
['bklyn', 'nypl', 'ia', 'nyu_press'] expected
------------------------------------
Result for In Syracuse - 200 km west of Albany
[('nypl', 0.15723651253820076), ('bklyn', 0.1571498614504659), ('ia', 0.0021984559630425313), ('nyu_press', 0.0006276865004806642), ('unm', 1.8230146414748216e-09), ('albany', 7.531464476485446e-10)]
['nypl', 'bklyn', 'ia', 'nyu_press'] expected
------------------------------------
Result for In Albany
[('albany', 0.9932613855290392), ('nypl', 0.3166354528370172), ('bklyn', 0.3164609589744859), ('ia', 0.0021984559630425313), ('nyu_press', 0.0012640053897856058), ('unm', 1.8230146414748216e-09)]
['albany', 'nypl', 'bklyn', 'ia', 'nyu_press'] expected
------------------------------------
Result for In Queens
[('nypl', 0.9512254693898606), ('bklyn', 0.9507012608566567), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)]
['nypl', 'bklyn', 'ia', 'nyu_press'] expected
------------------------------------
Result for In Las Cruces, NM
[('unm', 0.005959466533252207), ('ia', 0.0021984559630425313), ('nyu_press', 1.6526519014779194e-10), ('nypl', 2.1313139666483575e-138), ('bklyn', 2.130139426011877e-138), ('albany', 2.476517792606116e-160)]
['unm', 'ia'] expected
------------------------------------
Result for In New Jersey
[('nypl', 0.00524749658055757), ('bklyn', 0.005244604750414239), ('ia', 0.0021984559630425313), ('nyu_press', 2.0947951030993548e-05), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)]
['nypl', 'bklyn', 'ia', 'nyu_press'] expected
------------------------------------
Result for In Manhattan, Spanish speaker
[('nypl', 0.9999958421062768), ('bklyn', 0.9507012608566567), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)]
['nypl', 'bklyn', 'ia', 'unm'] expected
------------------------------------
Result for In Manhattan
[('nypl', 0.9999958421062768), ('bklyn', 0.9507012608566567), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)]
['nypl', 'bklyn', 'ia', 'nyu_press'] expected
------------------------------------
Result for In Albany, Russian speaker
[('albany', 0.9932613855290392), ('nypl', 0.3166354528370172), ('bklyn', 0.3164609589744859), ('ia', 0.0021984559630425313), ('nyu_press', 0.0012640053897856058), ('unm', 1.8230146414748216e-09)]
['nypl', 'albany', 'ia'] expected
------------------------------------
Result for In Manhattan, with a11y features on
[('nypl', 0.9999958421062768), ('bklyn', 0.9507012608566567), ('bard', 0.8957489722975418), ('nyu_press', 0.003991972859830552), ('ia', 0.0021984559630425313), ('unm', 1.8230146414748216e-09), ('albany', 3.227392712409606e-11)]
['nypl', 'bklyn', 'bard', 'ia', 'nyu_press'] expected
------------------------------------
libraries = dict()
libraries['nypl'] = dict(
collections=dict(en=150000, es=20000, ru=5000),
audience='general',
focus_area=321,
service_area=141000,
)
libraries['bklyn'] = dict(
collections=dict(en=75000, es=10000),
audience='general',
focus_area=180,
service_area=141000,
)
libraries['albany'] = dict(
collections=dict(en=50000, es=5000),
audience='general',
focus_area=56,
service_area=56,
)
libraries['nyu_lib'] = dict(
collections=dict(en=100000),
audience='secondary',
focus_area=789,
service_area=789,
)
libraries['nyu_press'] = dict(
collections=dict(en=40),
audience='general',
focus_area=789,
service_area=510000000,
)
libraries['unm'] = dict(
collections=dict(en=60, es=10),
audience='general',
focus_area=315000,
service_area=510000000,
)
libraries['bard'] = dict(
collections=dict(en=100000),
audience='a11y',
focus_area=10000000,
service_area=10000000,
)
libraries['ia'] = dict(
collections=dict(en=10000000, es=1000, ru=1000),
audience='general',
focus_area=510000000,
service_area=510000000,
)
scenarios = dict()
scenarios['In Manhattan'] = dict(
focus_distances=dict(nypl=0, bklyn=10, albany=230,
nyu_lib=0, nyu_press=0, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=230,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['nypl', 'bklyn', 'ia', 'nyu_press'],
)
scenarios['In Brooklyn'] = dict(
focus_distances = dict(nypl=10, bklyn=0, albany=230,
nyu_lib=0, nyu_press=0, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=230,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['bklyn', 'nypl', 'ia', 'nyu_press'],
)
scenarios['In Queens'] = dict(
focus_distances=dict(nypl=10, bklyn=10, albany=230,
nyu_lib=0, nyu_press=0, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=230,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['nypl', 'bklyn', 'ia', 'nyu_press'],
)
scenarios['In Albany'] = dict(
focus_distances=dict(nypl=230, bklyn=230, albany=0,
nyu_lib=230, nyu_press=230, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=0,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['albany', 'nypl', 'bklyn', 'ia', 'nyu_press'],
)
scenarios['In Syracuse - 200 km west of Albany'] = dict(
focus_distances=dict(nypl=370, bklyn=370, albany=200,
nyu_lib=370, nyu_press=370, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=200,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['nypl', 'bklyn', 'ia', 'nyu_press'],
)
scenarios['In New Jersey'] = dict(
focus_distances=dict(nypl=50, bklyn=50, albany=230,
nyu_lib=50, nyu_press=50, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=50, bklyn=50, albany=230,
nyu_lib=50, nyu_press=50, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['nypl', 'bklyn', 'ia', 'nyu_press'],
)
scenarios['In Las Cruces, NM'] = dict(
focus_distances=dict(nypl=3400, bklyn=3400, albany=3500,
nyu_lib=3400, nyu_press=3400, unm=0,
bard=0, ia=0),
service_distances=dict(nypl=3000, bklyn=3000, albany=3500,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['unm', 'ia'],
)
scenarios['In Albany, Russian speaker'] = dict(
focus_distances=dict(nypl=230, bklyn=230, albany=0,
nyu_lib=230, nyu_press=230, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=0,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['nypl', 'albany', 'ia'],
)
scenarios['In Manhattan, Spanish speaker'] = dict(
focus_distances=dict(nypl=0, bklyn=10, albany=230,
nyu_lib=0, nyu_press=0, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=230,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='general',
expected=['nypl', 'bklyn', 'ia', 'unm'],
)
scenarios['In Manhattan, with a11y features on'] = dict(
focus_distances=dict(nypl=0, bklyn=10, albany=230,
nyu_lib=0, nyu_press=0, unm=3000,
bard=0, ia=0),
service_distances=dict(nypl=0, bklyn=0, albany=230,
nyu_lib=0, nyu_press=0, unm=0,
bard=0, ia=0),
lang='en',
audience='a11y',
expected=['nypl', 'bklyn', 'bard', 'ia', 'nyu_press'],
)
collection_size_factor = 1000
audience_factor = 1.01
focus_area_distance_factor = 0.005
service_area_distance_factor = 0.1
focus_area_size_factor = 0.000000012
score_threshold = 0
from pdb import set_trace
from math import log, exp
def decay(rate, value):
return exp(-1.0 * rate * value)
def compute_score(scenario, library, library_name):
score = 1
audience = 0
if library.get('audience') != 'general' and scenario.get('audience') != library.get('audience'):
return 0
if library.get('audience') != 'general' and scenario.get('audience') == library.get('audience'):
audience += 1
if audience:
score = score * audience_factor
lang = scenario.get('lang')
collection_size = library.get('collections').get(lang)
max_collection_for_lang = max([l.get("collections").get(lang) for l in libraries.values()])
score = score * (1 - decay(collection_size_factor, 1.0 * collection_size / max_collection_for_lang))
focus_area_distance = scenario.get('focus_distances').get(library_name)
service_area_distance = scenario.get("service_distances").get(library_name)
score = score * decay(focus_area_distance_factor, focus_area_distance)
score = score * decay(service_area_distance_factor, service_area_distance)
focus_area_size = library.get("focus_area")
score = score * decay(focus_area_size_factor, focus_area_size)
return score
if __name__ == '__main__':
for scenario_name, scenario in scenarios.iteritems():
scores = dict()
for library_name, library in libraries.iteritems():
score = compute_score(scenario, library, library_name)
if score > score_threshold:
scores[library_name] = score
print "Result for %s" % scenario_name
print [i for i in sorted(scores.items(), key=lambda x: -x[1])]
print scenario.get('expected'), " expected"
print "------------------------------------"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment