Created
June 22, 2023 21:09
-
-
Save hornc/c62cb8b8178d98ded884ac0eb39fcf93 to your computer and use it in GitHub Desktop.
Generate a Wipkipedia cite book template for an archive.org identifier (with optional page number + link)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import internetarchive as ia | |
import biblionames | |
#from isbn_hyphenate import hyphenate | |
from isbnlib import mask as hyphenate | |
OCLC_TAG = 'urn:oclc:record:' | |
def listify(data): | |
if not data: | |
return | |
return data if isinstance(data, list) else [data] | |
def cite(item, page=None): | |
params = {} | |
authors = item.metadata.get('creator', []) | |
if isinstance(authors, str): | |
authors = [authors] | |
for i, author in enumerate(authors): | |
num = i + 1 if len(authors) > 1 else '' | |
first, last = biblionames.Author.ia_to_first_last(author) | |
if author and not (first and last): | |
params[f'author{num}'] = biblionames.Author.ia_to_natural(author) | |
elif author: | |
params[f'last{num}'] = last | |
params[f'first{num}'] = first | |
params['date'] = item.metadata.get('date') | |
params['title'] = biblionames.Title.transform_title(item.metadata.get('title')).replace(' : ', ': ') | |
url = f'https://archive.org/details/{item.identifier}' | |
params['url'] = url | |
if 'inlibrary' in item.metadata.get('collection', []): | |
params['url-access'] = 'registration' | |
params['publication-place'], params['publisher'] = ia_publisher(item.metadata.get('publisher', [])) | |
# TODO: add volume: e.g. bakersbiographic02slon | |
if page: | |
if '-' in page: | |
page = page.replace('-', '–') | |
if '–' in page: | |
ptype = 'pages' | |
pp = page[:page.find('–')] | |
pagelink = f'{url}/page/{pp}' | |
else: | |
ptype = 'page' | |
pagelink = f'{url}/page/{page}' | |
params[ptype] = f'[{pagelink} {page}]' | |
isbn = get_isbn(item) | |
params['isbn'] = hyphenate(isbn) if isbn else None | |
lccn = item.metadata.get('lccn') | |
if isinstance(lccn, list): | |
lccn = lccn[0] | |
params['lccn'] = lccn.split()[0] if lccn else None | |
identifiers = item.metadata.get('external-identifier', []) | |
if isinstance(identifiers, str): | |
identifiers = [identifiers] | |
for id_ in identifiers: | |
if OCLC_TAG in id_: | |
oclc = id_.replace(OCLC_TAG, '') | |
break | |
oclc = listify(item.metadata.get('oclc_id')) | |
if oclc: | |
params['oclc'] = oclc[0] | |
return '{{cite book|' + ' |'.join([f'{k}={v}' for k, v in params.items() if v]) + '}}' | |
def get_isbn(item): | |
"""Returns the best ISBN.""" | |
isbns = listify(item.metadata.get('isbn')) | |
if item.metadata.get('scribe3_search_catalog') == 'isbn': | |
return item.metadata.get('scribe3_search_id') | |
if not isbns: | |
return None | |
for isbn in isbns: | |
if isbn.startswith('97'): | |
return isbn | |
return isbns[0] if isbns else None | |
def ia_publisher(raw): | |
if ';' in raw: # Multiple publsihers | |
publishers = raw.split(';') | |
raw = publishers[0] # Use first | |
if ':' in raw: | |
location, publisher = raw.split(':') | |
return location.strip(), publisher.strip() | |
elif ',' in raw: | |
location, publisher = raw.split(',') | |
return location.strip(), publisher.strip() | |
else: | |
return None, raw | |
def main(): | |
parser = argparse.ArgumentParser(description='archive.org book cite tool.') | |
parser.add_argument('item', help='Book item identifier to cite') | |
parser.add_argument('-p', '--page', help='Page number (or range)') | |
args = parser.parse_args() | |
item = ia.get_item(args.item) | |
print(item) | |
print(cite(item, args.page)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This uses
biblionames
which is unfortunately not public :(