Last active
November 24, 2017 08:37
-
-
Save hornc/32a1924a96e4183308a01ea20c72b536 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from catharbot import catharbot | |
bot = catharbot.CatharBot() | |
# Work In Progress | |
# uses the 'merging' branch of Catharbot, https://github.com/hornc/catharbot/tree/merging | |
def extract_olid(olid): | |
"""Convert a string like '/authors/OL1412764A' to just 'OL1412764A'""" | |
return olid.split('/')[-1] | |
def remove_editions(duplicates, docs): | |
""" Removes editions by [OLID str] from a merge changeset docs ([JSON dicts]) | |
reason: to prevent duplicated edition documents that merge_works() reassigns | |
and merge_editions() makes into redirects. | |
TODO: refactor to make this process clearer / cleaner | |
""" | |
return [ d for d in docs if extract_olid(d['key']) not in duplicates ] | |
def test_remove_editions(): | |
dupes = ['A', 'C'] | |
docs = [{'key': '/books/A'}, {'key': '/books/B'}, {'key': '/books/C'}] | |
assert(remove_editions(dupes, docs) == [{'key': '/books/B'}]) | |
# TODO: change merge_works() and merge_editions() to use **kwargs | |
# e.g. merge_works(master="OL1234W", duplicates=["OL345W", "OL678W"]) | |
# example changeset: https://openlibrary.org/recentchanges/2017/09/22/bulk_update/53034676 | |
# full_merge(master='OL24869802M', duplicates=['OL25426847M', 'OL24928157M']) | |
def full_merge(**kwargs): | |
""" Merge identical editions and their works | |
kwargs: | |
master: Master edition OLID (str - required) | |
and one of | |
duplicate: Duplicate edtion OLID (str) | |
duplicates: list of edition OLIDs ([str]) | |
simple: (bool) Do not merge data, just perform redirects, defaults to False | |
Simple merge is faster and can be used when merging 'bad' data into a good record. | |
""" | |
# TODO: this works, but refactor for clarity! | |
master = kwargs['master'] | |
simple = kwargs.setdefault('simple', False) | |
duplicates = kwargs.setdefault('duplicates', []) | |
if 'duplicate' in kwargs: | |
duplicates.append(kwargs['duplicate']) | |
print "Merge %s into %s" % (duplicates, master) | |
master_edition = bot.load_doc(master) | |
dupe_editions = [ bot.load_doc(e) for e in duplicates ] | |
changeset = [] | |
if not simple: | |
merged_edition = bot.merge_into_work(master_edition, dupe_editions) | |
changeset.append(merged_edition) | |
# are there extra works to merge? | |
master_w_olid = extract_olid(master_edition['works'][0]['key']) | |
dupe_w_olids = [ extract_olid(e['works'][0]['key']) for e in dupe_editions if extract_olid(e['works'][0]['key']) != master_w_olid ] | |
if len(dupe_w_olids) > 1 or master_w_olid not in dupe_w_olids: | |
master_work = bot.load_doc(master_w_olid) | |
dupe_works = [ bot.load_doc(w) for w in dupe_w_olids ] | |
if not simple: | |
merged_work = bot.merge_into_work(master_work, dupe_works) | |
changeset.append(merged_work) | |
changeset += bot.merge_works(dupe_w_olids, master_w_olid) | |
# remove reassigned duplicate editions from changeset that will be made into redirects | |
changeset = remove_editions(duplicates, changeset) | |
changeset += bot.merge_editions(duplicates, master) | |
return changeset | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# move_editions(['OL559079M'], 'OL2420021W') | |
from olclient.openlibrary import OpenLibrary | |
ol = OpenLibrary() | |
def move_editions(edition_list, master_work): | |
changeset = [] | |
for olid in edition_list: | |
edition = ol.Edition.get(olid) | |
edition.work_olid = master_work | |
changeset.append(edition) | |
ol.save_many(changeset, "move to work %s" % master_work) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment