Last active
April 27, 2021 17:57
-
-
Save BharatKalluri/ac48506b8eef3f0e66e9545518331148 to your computer and use it in GitHub Desktop.
IA <-> OL sync
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Union | |
from internetarchive import configure, get_session | |
from olclient.openlibrary import OpenLibrary | |
from models import IsbnData | |
from models import EditionMetadata | |
class IsbnNotFoundInOl(Exception): | |
def __init__(self, message: str): | |
super().__init__(message) | |
ia_session = get_session() | |
ia_session.mount_http_adapter() | |
ol = OpenLibrary() | |
def get_ia_identifier_list(search_q: str) -> List[str]: | |
search_results = ia_session.search_items(search_q, fields="isbn") | |
identifier_list: list[dict[str, str]] = list(search_results) | |
return [el.get("identifier") for el in identifier_list] | |
def get_isbn_from_ia_id(ia_id: str) -> IsbnData: | |
metadata = ia_session.get_metadata(ia_id) | |
isbn_list: Union[str, list] = metadata.get("metadata", {}).get("isbn") | |
if isinstance(isbn_list, str): | |
return IsbnData( | |
isbn_10=isbn_list if len(isbn_list) == 10 else None, | |
isbn_13=isbn_list if len(isbn_list) == 13 else None, | |
) | |
elif isinstance(isbn_list, list): | |
if not isbn_list: | |
raise Exception(f"No ISBN found for {ia_id}") | |
isbn_10_filtered: List[str] = list(filter(lambda x: len(x) == 10, isbn_list)) | |
isbn_13_filtered: List[str] = list(filter(lambda x: len(x) == 13, isbn_list)) | |
return IsbnData( | |
isbn_10=isbn_10_filtered[0] if isbn_10_filtered else None, | |
isbn_13=isbn_13_filtered[0] if isbn_13_filtered else None, | |
) | |
else: | |
raise Exception(f"unhandled return type, {isbn_list}") | |
def get_ol_edition_info(isbn_data: IsbnData) -> EditionMetadata: | |
edition_info = ol.Edition.get(isbn=isbn_data.isbn_13) if isbn_data.isbn_13 else None | |
if edition_info is None: | |
edition_info = ( | |
ol.Edition.get(isbn=isbn_data.isbn_10) if isbn_data.isbn_10 else None | |
) | |
if edition_info is None: | |
raise IsbnNotFoundInOl( | |
f"Could not find any data for ISBN {isbn_data.json()}" | |
) | |
return EditionMetadata( | |
ol_work_id=edition_info.work_olid, ol_edition_id=edition_info.olid | |
) | |
def update_ol_ids_in_ia_metadata_based_on_isbn(ia_id: str): | |
isbn_data = get_isbn_from_ia_id(ia_id) | |
try: | |
print(f"Retrieving data for IA id: {ia_id}, isbn data: {isbn_data.json()}") | |
edition_info: EditionMetadata = get_ol_edition_info(isbn_data=isbn_data) | |
except IsbnNotFoundInOl as e: | |
print(f"IsbnNotFoundInOl error: {e}") | |
return | |
except BaseException as e: | |
print(f"Unknown Error: {e}") | |
return | |
if edition_info.ol_edition_id is None or edition_info.ol_work_id is None: | |
raise Exception( | |
f"OLEditionOrWorkIdMissing error: {ia_id}, isbn_data: {isbn_data.json()}" | |
) | |
metadata_to_modify = { | |
"openlibrary_edition": edition_info.ol_edition_id, | |
"openlibrary_work": edition_info.ol_work_id, | |
} | |
print(ia_id, metadata_to_modify) | |
# ia_entity: Item = get_item(ia_id) | |
# response: Any = ia_entity.modify_metadata(metadata_to_modify) | |
# # TODO: remove openlibrary from metadata as well | |
# assert ( | |
# response.status_code == 200 | |
# ), f"failed to update metadata for ia_id: {ia_id}" | |
def open_library_metadata_fix(): | |
ia_search_query = ( | |
"mediatype:texts AND openlibrary:* AND NOT openlibrary_edition:* AND isbn:*" | |
) | |
identifier_list_from_ia = get_ia_identifier_list(ia_search_query) | |
for ia_id in identifier_list_from_ia: | |
try: | |
update_ol_ids_in_ia_metadata_based_on_isbn(ia_id) | |
except BaseException as e: | |
print(f"Failed to update metadata, error: {e}") | |
def add_open_library_identifiers_for_ia_isbn_entities(): | |
ia_search_query = "mediatype:texts AND NOT openlibrary_edition:* AND isbn:*" | |
ia_id_list = get_ia_identifier_list(ia_search_query) | |
for ia_id in ia_id_list: | |
update_ol_ids_in_ia_metadata_based_on_isbn(ia_id) | |
if __name__ == "__main__": | |
add_open_library_identifiers_for_ia_isbn_entities() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional | |
from pydantic import BaseModel | |
class IsbnData(BaseModel): | |
isbn_10: Optional[str] | |
isbn_13: Optional[str] | |
class EditionMetadata(BaseModel): | |
ol_work_id: str | |
ol_edition_id: str |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment