Skip to content

Instantly share code, notes, and snippets.

@BharatKalluri
Last active April 27, 2021 17:57
Show Gist options
  • Save BharatKalluri/ac48506b8eef3f0e66e9545518331148 to your computer and use it in GitHub Desktop.
Save BharatKalluri/ac48506b8eef3f0e66e9545518331148 to your computer and use it in GitHub Desktop.
IA <-> OL sync
from typing import List, Union
from internetarchive import configure, get_session
from olclient.openlibrary import OpenLibrary
from models import IsbnData
from models import EditionMetadata
class IsbnNotFoundInOl(Exception):
def __init__(self, message: str):
super().__init__(message)
ia_session = get_session()
ia_session.mount_http_adapter()
ol = OpenLibrary()
def get_ia_identifier_list(search_q: str) -> List[str]:
search_results = ia_session.search_items(search_q, fields="isbn")
identifier_list: list[dict[str, str]] = list(search_results)
return [el.get("identifier") for el in identifier_list]
def get_isbn_from_ia_id(ia_id: str) -> IsbnData:
metadata = ia_session.get_metadata(ia_id)
isbn_list: Union[str, list] = metadata.get("metadata", {}).get("isbn")
if isinstance(isbn_list, str):
return IsbnData(
isbn_10=isbn_list if len(isbn_list) == 10 else None,
isbn_13=isbn_list if len(isbn_list) == 13 else None,
)
elif isinstance(isbn_list, list):
if not isbn_list:
raise Exception(f"No ISBN found for {ia_id}")
isbn_10_filtered: List[str] = list(filter(lambda x: len(x) == 10, isbn_list))
isbn_13_filtered: List[str] = list(filter(lambda x: len(x) == 13, isbn_list))
return IsbnData(
isbn_10=isbn_10_filtered[0] if isbn_10_filtered else None,
isbn_13=isbn_13_filtered[0] if isbn_13_filtered else None,
)
else:
raise Exception(f"unhandled return type, {isbn_list}")
def get_ol_edition_info(isbn_data: IsbnData) -> EditionMetadata:
edition_info = ol.Edition.get(isbn=isbn_data.isbn_13) if isbn_data.isbn_13 else None
if edition_info is None:
edition_info = (
ol.Edition.get(isbn=isbn_data.isbn_10) if isbn_data.isbn_10 else None
)
if edition_info is None:
raise IsbnNotFoundInOl(
f"Could not find any data for ISBN {isbn_data.json()}"
)
return EditionMetadata(
ol_work_id=edition_info.work_olid, ol_edition_id=edition_info.olid
)
def update_ol_ids_in_ia_metadata_based_on_isbn(ia_id: str):
isbn_data = get_isbn_from_ia_id(ia_id)
try:
print(f"Retrieving data for IA id: {ia_id}, isbn data: {isbn_data.json()}")
edition_info: EditionMetadata = get_ol_edition_info(isbn_data=isbn_data)
except IsbnNotFoundInOl as e:
print(f"IsbnNotFoundInOl error: {e}")
return
except BaseException as e:
print(f"Unknown Error: {e}")
return
if edition_info.ol_edition_id is None or edition_info.ol_work_id is None:
raise Exception(
f"OLEditionOrWorkIdMissing error: {ia_id}, isbn_data: {isbn_data.json()}"
)
metadata_to_modify = {
"openlibrary_edition": edition_info.ol_edition_id,
"openlibrary_work": edition_info.ol_work_id,
}
print(ia_id, metadata_to_modify)
# ia_entity: Item = get_item(ia_id)
# response: Any = ia_entity.modify_metadata(metadata_to_modify)
# # TODO: remove openlibrary from metadata as well
# assert (
# response.status_code == 200
# ), f"failed to update metadata for ia_id: {ia_id}"
def open_library_metadata_fix():
ia_search_query = (
"mediatype:texts AND openlibrary:* AND NOT openlibrary_edition:* AND isbn:*"
)
identifier_list_from_ia = get_ia_identifier_list(ia_search_query)
for ia_id in identifier_list_from_ia:
try:
update_ol_ids_in_ia_metadata_based_on_isbn(ia_id)
except BaseException as e:
print(f"Failed to update metadata, error: {e}")
def add_open_library_identifiers_for_ia_isbn_entities():
ia_search_query = "mediatype:texts AND NOT openlibrary_edition:* AND isbn:*"
ia_id_list = get_ia_identifier_list(ia_search_query)
for ia_id in ia_id_list:
update_ol_ids_in_ia_metadata_based_on_isbn(ia_id)
if __name__ == "__main__":
add_open_library_identifiers_for_ia_isbn_entities()
from typing import Optional
from pydantic import BaseModel
class IsbnData(BaseModel):
isbn_10: Optional[str]
isbn_13: Optional[str]
class EditionMetadata(BaseModel):
ol_work_id: str
ol_edition_id: str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment