Created
August 9, 2010 19:49
-
-
Save apetresc/515978 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
## arXiv script version 0.2 | |
## Copyright 2008 Tom Brown | |
## This program is free software; you can redistribute it and/or | |
## modify it under the terms of the GNU General Public License as | |
## published by the Free Software Foundation; either version 3 of the | |
## License, or (at your option) any later version. | |
## This program is distributed in the hope that it will be useful, | |
## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
## GNU General Public License for more details. | |
## You should have received a copy of the GNU General Public License | |
## along with this program. If not, see <http://www.gnu.org/licenses/>. | |
## See http://www.stringwiki.org/wiki/ArXiv_script for more usage | |
## instructions | |
'''arXiv script | |
Usage: | |
python arxiv.py reference [ -htabcjdps ] [ --help ] | |
"reference" must be a standard arXiv reference, e.g. hep-th/9711200, 0705.0303. | |
Options: | |
-h, --help | |
displays this help message | |
-t | |
displays the title | |
-a | |
displays the author(s) | |
-b | |
displays the aBstract | |
-c | |
displays the comments | |
-j | |
displays the journal reference | |
-d | |
downloads the PDF | |
-p | |
downloads the PS | |
-s | |
downloads the source file | |
''' | |
__version__ = "0.2" | |
__author__ = "Tom Brown" | |
__copyright__ = "Copyright 2008 Tom Brown, GNU GPL 3" | |
import sys, os, getopt, re, urllib,gzip | |
def findRefType(ref): | |
ref = ref.replace('arxiv:','') | |
if re.search(r'^[a-zA-Z\-]+/\d{7}$',ref): | |
type = 'old-style eprint' | |
elif re.search(r'^\d{7}$',ref): | |
type = 'old-style eprint' | |
ref = 'hep-th/' + ref | |
elif re.search('^\d{4}\.\d{4}$',ref): | |
type = 'new-style eprint' | |
else: | |
type = 'not arXiv' | |
return type, ref | |
def downloadPDF(ref,type,downloadPath): | |
downloadPath = os.path.expanduser(downloadPath) | |
if type == 'old-style eprint': | |
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref.replace('/','-') + '.pdf') | |
elif type == 'new-style eprint': | |
urllib.urlretrieve('http://arxiv.org/pdf/' + ref, downloadPath + ref + '.pdf') | |
def downloadPS(ref,type,downloadPath): | |
downloadPath = os.path.expanduser(downloadPath) | |
filename = downloadPath + ref.replace('/','-') | |
urllib.urlretrieve('http://arxiv.org/ps/' + ref, filename) | |
gzipFile = gzip.GzipFile(filename) | |
psFile = open(filename + ".ps","w") | |
psFile.write(gzipFile.read()) | |
psFile.close() | |
gzipFile.close() | |
os.remove(filename) | |
def downloadSource(ref,type,downloadPath): | |
downloadPath = os.path.expanduser(downloadPath) | |
filename = downloadPath + ref.replace('/','-') | |
urllib.urlretrieve('http://arxiv.org/e-print/' + ref, filename + ".dum") | |
gzipFile = gzip.GzipFile(filename + ".dum") | |
sourceFile = open(filename,"w") | |
sourceFile.write(gzipFile.read()) | |
sourceFile.close() | |
gzipFile.close() | |
os.remove(filename + ".dum") | |
def getTitle(html): | |
title = html[html.find(">Title:</span>")+15:] | |
title = title[:title.find("</h1>")] | |
return title | |
def getAuthors(html): | |
authors = html[html.find(">Authors:</span>"):] | |
authors = authors[authors.find("\">")+2:] | |
authors = authors[:authors.find("</div>")] | |
authors = re.sub('<[^>]*>','',authors) | |
authors = authors.replace("\n","") | |
return authors | |
def getAbstract(html): | |
abstract = html[html.find("Abstract:</span>")+17:] | |
abstract = abstract[:abstract.find("</blockquote>")-1] | |
return abstract | |
def getComments(html): | |
if "comments" not in html: | |
return "no comments" | |
else: | |
comments = html[html.find("comments\">")+10:] | |
comments = comments[:comments.find("</td>")] | |
return comments | |
def getJref(html): | |
if "jref" not in html: | |
return "no journal reference" | |
else: | |
jref = html[html.find("jref\">")+6:] | |
jref = jref[:jref.find("</td>")] | |
return jref | |
if __name__ == "__main__": | |
authorOpt = 0 | |
titleOpt = 0 | |
abstractOpt = 0 | |
commentsOpt = 0 | |
jrefOpt = 0 | |
pdfOpt = 0 | |
psOpt = 0 | |
sourceOpt = 0 | |
try: | |
options, arguments = getopt.gnu_getopt(sys.argv[1:], | |
'hatbcjdpsv', ['help']) | |
except getopt.error: | |
print 'error: you tried to use an unknown option or the argument for an option that requires it was missing; try \'arxiv.py -h\' for more information' | |
sys.exit(0) | |
for o,a in options: | |
if o in ('-h','--help'): | |
print __doc__ | |
sys.exit(0) | |
elif o == '-a': | |
authorOpt = 1 | |
elif o == '-t': | |
titleOpt = 1 | |
elif o == '-b': | |
abstractOpt = 1 | |
elif o == '-c': | |
commentsOpt = 1 | |
elif o == '-j': | |
jrefOpt = 1 | |
elif o == '-d': | |
pdfOpt = 1 | |
elif o == '-p': | |
psOpt = 1 | |
elif o == '-s': | |
sourceOpt = 1 | |
if len(options) == 0: | |
authorOpt = 1 | |
titleOpt = 1 | |
abstractOpt = 1 | |
commentsOpt = 1 | |
jrefOpt = 1 | |
if len(arguments) != 1: | |
print 'you didn\'t specify an arXiv reference; try \'arxiv.py -h\' for more information' | |
sys.exit(0) | |
else: | |
ref=arguments[0] | |
type, ref = findRefType(ref) | |
if type=="not arXiv": | |
print "type not of arXiv form" | |
sys.exit(0) | |
if authorOpt+titleOpt+abstractOpt+commentsOpt+jrefOpt > 0: | |
htmlObject = urllib.urlopen('http://arxiv.org/abs/' + ref) | |
html = htmlObject.read() | |
if titleOpt: | |
title = getTitle(html) | |
print title | |
if authorOpt: | |
authors = getAuthors(html) | |
print authors | |
if abstractOpt: | |
abstract = getAbstract(html) | |
print abstract | |
if commentsOpt: | |
comments = getComments(html) | |
print comments | |
if jrefOpt: | |
jref = getJref(html) | |
print jref | |
if pdfOpt: | |
downloadPDF(ref,type,"") | |
if psOpt: | |
downloadPS(ref,type,"") | |
if sourceOpt: | |
downloadSource(ref,type,"") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just in case the original source goes down: http://www.stringwiki.org/wiki/ArXiv_script