Last active
September 21, 2017 12:21
-
-
Save hornc/ee7914be72534d9d39025f0701b495e8 to your computer and use it in GitHub Desktop.
Example semi-automated bot merge
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Steps to merge a number of Kite Runner editions and works: | |
from catharbot import catharbot | |
bot = catharbot.CatharBot() | |
def merge_unique_lists(lists, hash_fn=None): | |
""" | |
Combine unique lists into a new unique list. Preserves ordering. | |
""" | |
result = [] | |
seen = set() | |
for lst in lists: | |
for el in lst: | |
hsh = hash_fn(el) if hash_fn else el | |
if hsh not in seen: | |
result.append(el) | |
seen.add(hsh) | |
return result | |
orphan_olids = ['OL24057181M', 'OL24237802M'] | |
master_olid = 'OL5782001W' # Chosen as master because: Most Editions, On the most Lists, on Staff Picks List | |
dupe_olids = [ 'OL5781992W', | |
'OL17174243W', | |
'OL16068777W', | |
'OL17348501W', | |
'OL16049475W' | |
] | |
# Move orphans to master work | |
orphans = [ bot.get_move_edition(e, master_olid) for e in orphan_olids ] | |
bot.save_many(orphans, "Associate with work") | |
master = bot.load_doc(master_olid) | |
edition_with_desc = bot.load_doc(orphan_olids[1]) # OL24237802M has a good description, use that on the master work | |
master['description'] = edition_with_desc['description'] | |
master['title'] = "The Kite Runner" # Title case the master work | |
# Update lists on master from all duplicates... | |
for w in dupe_olids: | |
dupe = bot.load_doc(w) | |
for prop in ['subjects', 'covers', 'subject_places']: | |
master[prop] = merge_unique_lists([master.get(prop, []), dupe.get(prop, [])]) | |
bot.save_one(master, "update master before merge") | |
bot.merge_works(dupe_olids, master_olid) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# These are the commands I used to merge | |
# https://openlibrary.org/works/OL7920112W?v=5 (dupe) | |
# and | |
# https://openlibrary.org/works/OL483502W/House_of_Sand_and_Fog?v=7 (master, because it was on the most lists. Lists don't like redirects) | |
# Uses my bot code, https://github.com/hornc/catharbot | |
# which requires the openlibrary-client, https://github.com/internetarchive/openlibrary-client/ | |
# | |
from catharbot import catharbot | |
bot = catharbot.CatharBot() | |
def merge_subjects(a, b): | |
return list(set(a['subjects']) | set(b['subjects'])) | |
def merge_covers(a, b): | |
return list(set(a['covers']) | set(b['covers'])) | |
master_olid = "OL483502W" | |
dupe_olid = "OL7920112W" | |
master = bot.load_doc(master_olid) | |
dupe = bot.load_doc(dupe_olid) | |
master['title'] = dupe['title'] # The dupe title happened to be in proper title case | |
master['description'] = dupe['description'] # dupe had a description, master didn't | |
master['subjects'] = merge_subjects(master, dupe) # dupe only had one subject that the master already had, so no-op | |
master['covers'] = merge_covers(master, dupe) # dupe had one cover not on the master | |
bot.save_one(master, "update master before merge") | |
bot.merge_works([dupe_olid], master_olid) # This moves the dupe editions and makes the dupe work a redirect |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment