Skip to content

Instantly share code, notes, and snippets.

@hornc
Created June 22, 2023 21:09
Show Gist options
  • Save hornc/c62cb8b8178d98ded884ac0eb39fcf93 to your computer and use it in GitHub Desktop.
Save hornc/c62cb8b8178d98ded884ac0eb39fcf93 to your computer and use it in GitHub Desktop.
Generate a Wipkipedia cite book template for an archive.org identifier (with optional page number + link)
#!/usr/bin/env python
import argparse
import internetarchive as ia
import biblionames
#from isbn_hyphenate import hyphenate
from isbnlib import mask as hyphenate
OCLC_TAG = 'urn:oclc:record:'
def listify(data):
if not data:
return
return data if isinstance(data, list) else [data]
def cite(item, page=None):
params = {}
authors = item.metadata.get('creator', [])
if isinstance(authors, str):
authors = [authors]
for i, author in enumerate(authors):
num = i + 1 if len(authors) > 1 else ''
first, last = biblionames.Author.ia_to_first_last(author)
if author and not (first and last):
params[f'author{num}'] = biblionames.Author.ia_to_natural(author)
elif author:
params[f'last{num}'] = last
params[f'first{num}'] = first
params['date'] = item.metadata.get('date')
params['title'] = biblionames.Title.transform_title(item.metadata.get('title')).replace(' : ', ': ')
url = f'https://archive.org/details/{item.identifier}'
params['url'] = url
if 'inlibrary' in item.metadata.get('collection', []):
params['url-access'] = 'registration'
params['publication-place'], params['publisher'] = ia_publisher(item.metadata.get('publisher', []))
# TODO: add volume: e.g. bakersbiographic02slon
if page:
if '-' in page:
page = page.replace('-', '–')
if '–' in page:
ptype = 'pages'
pp = page[:page.find('–')]
pagelink = f'{url}/page/{pp}'
else:
ptype = 'page'
pagelink = f'{url}/page/{page}'
params[ptype] = f'[{pagelink} {page}]'
isbn = get_isbn(item)
params['isbn'] = hyphenate(isbn) if isbn else None
lccn = item.metadata.get('lccn')
if isinstance(lccn, list):
lccn = lccn[0]
params['lccn'] = lccn.split()[0] if lccn else None
identifiers = item.metadata.get('external-identifier', [])
if isinstance(identifiers, str):
identifiers = [identifiers]
for id_ in identifiers:
if OCLC_TAG in id_:
oclc = id_.replace(OCLC_TAG, '')
break
oclc = listify(item.metadata.get('oclc_id'))
if oclc:
params['oclc'] = oclc[0]
return '{{cite book|' + ' |'.join([f'{k}={v}' for k, v in params.items() if v]) + '}}'
def get_isbn(item):
"""Returns the best ISBN."""
isbns = listify(item.metadata.get('isbn'))
if item.metadata.get('scribe3_search_catalog') == 'isbn':
return item.metadata.get('scribe3_search_id')
if not isbns:
return None
for isbn in isbns:
if isbn.startswith('97'):
return isbn
return isbns[0] if isbns else None
def ia_publisher(raw):
if ';' in raw: # Multiple publsihers
publishers = raw.split(';')
raw = publishers[0] # Use first
if ':' in raw:
location, publisher = raw.split(':')
return location.strip(), publisher.strip()
elif ',' in raw:
location, publisher = raw.split(',')
return location.strip(), publisher.strip()
else:
return None, raw
def main():
parser = argparse.ArgumentParser(description='archive.org book cite tool.')
parser.add_argument('item', help='Book item identifier to cite')
parser.add_argument('-p', '--page', help='Page number (or range)')
args = parser.parse_args()
item = ia.get_item(args.item)
print(item)
print(cite(item, args.page))
if __name__ == '__main__':
main()
@hornc
Copy link
Author

hornc commented Jun 22, 2023

This uses biblionames which is unfortunately not public :(

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment