Last active
June 2, 2016 04:54
-
-
Save derickfay/45d19f138014479d51cefa0b6e10c508 to your computer and use it in GitHub Desktop.
Extract bibliographic information in BibTeX format from PDF from jstor.org
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# jstorPDFtoBibTeX | |
# requires pdftotext, tag | |
# has about a 85% success rate in my experience | |
header="@article{" | |
footer="}\n\n" | |
biblio="" | |
for filename in "$@" | |
do | |
# get the text | |
textRef=$(/usr/local/bin/pdftotext "$filename" - | grep "Author(s):") | |
tempcitekey="tempcitekey,\n" # we'll let BibDesk set up the actual cite keys | |
# extract the bibliographical details | |
author="\tAuthor = {"$(echo "$textRef" | sed 's/ Source.*//' | sed 's/.*Author(s): //')"},\n" | |
title="\tTitle = {"$(echo "$textRef" | sed 's/ Author.*//')"},\n" | |
journal="\tJournal = {"$(echo "$textRef" | sed 's/.*Source: //' | sed 's/,.*//')"},\n" | |
volume="\tVolume = {"$(echo "$textRef" | sed 's/.*Vol. //' | sed 's/,.*//')"},\n" | |
number="\tNumber = {"$(echo "$textRef" | sed 's/.*No. //' | sed 's/ .*//')"},\n" | |
year="\tYear = {"$(grep -o "[0-9][0-9][0-9][0-9])," <<<$textRef | sed 's/),//')"},\n" | |
pages="\tPages = {"$(echo "$textRef" | sed 's/.*pp. //' | sed 's/ .*//')"},\n" | |
# add any OS X tags in the file to the BibTeX record as keywords | |
keywords="\tKeywords = {"$(tag -N "$filename")"},\n" | |
# annote="Annote = {$filename},\n" | |
bdskurl="\tBdsk-Url-1 = {"$(echo "$textRef" | sed 's/.*Stable URL: //' | sed 's/ .*//')"}\n" | |
biblio=$biblio$header$tempcitekey$author$year$title$journal$volume$number$pages$keywords$bdskurl$footer | |
done | |
echo $biblio |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment