-
-
Save omimo/5c97cf23b50824a4b2b243abd6f5d250 to your computer and use it in GitHub Desktop.
arxiv2kindle: recompiles an arxiv paper for kindle-sized screens, and sends it to your wifi-enabled kindle
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import lxml.html as html\n", | |
"import re\n", | |
"import urllib\n", | |
"import os, sys, subprocess, os.path\n", | |
"import glob\n", | |
"import IPython.display\n", | |
"import getpass\n", | |
"import tempfile" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Change the following:\n", | |
"The query can be an arxiv URL or any string containing an arxiv ID.\n", | |
"\n", | |
"It will prompt you for the Gmail account's password; note that the account security settings will have to \"allow unsecure apps\" for permission to use the Gmail SMTP server with TLS." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"query = \"http://arxiv.org/abs/1511.08228\"\n", | |
"kindle_email = '[email protected]'\n", | |
"your_gmail = '[email protected]'\n", | |
"gmailpass = getpass.getpass()\n", | |
"\n", | |
"# paper settings (decrease width/height to increase font)\n", | |
"landscape = True\n", | |
"width = \"6in\"\n", | |
"height = \"4in\"\n", | |
"margin = \"0.2in\"\n", | |
"# settings for latex geometry package:\n", | |
"if landscape:\n", | |
" geom_settings = dict(paperwidth=width, paperheight=height, margin=margin)\n", | |
"else:\n", | |
" geom_settings = dict(paperwidth=height, paperheight=width, margin=margin)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"----------" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"arxiv_id = re.match(r'(http://.*?/)?(?P<id>\\d{4}\\.\\d{4,5}(v\\d{1,2})?)', query).group('id')\n", | |
"arxiv_abs = 'http://arxiv.org/abs/' + arxiv_id\n", | |
"arxiv_pdf = 'http://arxiv.org/pdf/' + arxiv_id\n", | |
"arxiv_pgtitle = html.fromstring(requests.get(arxiv_abs).text.encode('utf8')).xpath('/html/head/title/text()')[0]\n", | |
"arxiv_title = re.sub(r'\\s+', ' ', re.sub(r'^\\[[^]]+\\]\\s*', '', arxiv_pgtitle), re.DOTALL)\n", | |
"arxiv_title_scrubbed = re.sub('[^-_A-Za-z0-9]+', '_', arxiv_title, re.DOTALL)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"IPython.display.HTML('''\n", | |
"<h2><a href=\"{abs}\">[{id}] {title}</a><br />\n", | |
"[<a href=\"{pdf}\">pdf</a>]</h2>\n", | |
"'''.format(id=arxiv_id, abs=arxiv_abs, pdf=arxiv_pdf, title=arxiv_title))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"---------------" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"d = tempfile.mkdtemp(prefix='arxiv2kindle_')\n", | |
"\n", | |
"url = 'http://arxiv.org/e-print/' + arxiv_id\n", | |
"!wget -O {os.path.join(d, 'src.tar.gz')} --user-agent=\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0\" {url}\n", | |
"\n", | |
"os.chdir(d)\n", | |
"!tar xvf src.tar.gz" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"texfiles = glob.glob(os.path.join(d, '*.tex'))\n", | |
"for texfile in texfiles:\n", | |
" with open(texfile, 'r') as f:\n", | |
" src = f.readlines()\n", | |
" if 'documentclass' in src[0]:\n", | |
" print('correct file: ' + texfile)\n", | |
" break" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# filter comments/newlines for easier debugging:\n", | |
"src = [line for line in src if line[0] != '%' and len(line.strip()) > 0]\n", | |
"\n", | |
"# strip font size, column stuff, and paper size stuff in documentclass line:\n", | |
"src[0] = re.sub(r'\\b\\d+pt\\b', '', src[0])\n", | |
"src[0] = re.sub(r'\\b\\w+column\\b', '', src[0])\n", | |
"src[0] = re.sub(r'\\b\\w+paper\\b', '', src[0])\n", | |
"src[0] = re.sub(r'(?<=\\[),', '', src[0]) # remove extraneous starting commas\n", | |
"src[0] = re.sub(r',(?=[\\],])', '', src[0]) # remove extraneous middle/ending commas\n", | |
"\n", | |
"# find begin{document}:\n", | |
"begindocs = [i for i, line in enumerate(src) if line.startswith(r'\\begin{document}')]\n", | |
"assert(len(begindocs) == 1)\n", | |
"src.insert(begindocs[0], '\\\\usepackage['+','.join(k+'='+v for k,v in geom_settings.items())+']{geometry}\\n')\n", | |
"src.insert(begindocs[0], '\\\\usepackage{times}\\n')\n", | |
"src.insert(begindocs[0], '\\\\pagestyle{empty}\\n')\n", | |
"if landscape:\n", | |
" src.insert(begindocs[0], '\\\\usepackage{pdflscape}\\n')\n", | |
"\n", | |
"# shrink figures to be at most the size of the page:\n", | |
"for i in range(len(src)):\n", | |
" line = src[i]\n", | |
" m = re.search(r'\\\\includegraphics\\[width=([.\\d]+)\\\\(line|text)width\\]', line)\n", | |
" if m:\n", | |
" mul = m.group(1)\n", | |
" src[i] = re.sub(r'\\\\includegraphics\\[width=([.\\d]+)\\\\(line|text)width\\]',\n", | |
" r'\\\\includegraphics[width={mul}\\\\textwidth,height={mul}\\\\textheight,keepaspectratio]'.format(mul=mul),\n", | |
" line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"os.rename(texfile, texfile+'.bak')\n", | |
"with open(texfile, 'w') as f:\n", | |
" f.writelines(src)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"texout = !pdflatex {texfile} && pdflatex {texfile} && pdflatex {texfile}\n", | |
"texout[-8:]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"------" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"pdffilename = texfile[:-4] + '.pdf'\n", | |
"if sys.platform == 'darwin':\n", | |
" os.system('open ' + pdffilename)\n", | |
"else:\n", | |
" os.system('xdg-open ' + pdffilename)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"-------" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from email.mime.application import MIMEApplication\n", | |
"from email.mime.multipart import MIMEMultipart\n", | |
"msg = MIMEMultipart()\n", | |
"pdf_part = MIMEApplication(open(texfile[:-4]+'.pdf', 'rb').read(), _subtype='pdf')\n", | |
"pdf_part.add_header('Content-Disposition', 'attachment', filename=arxiv_id+\"_\"+arxiv_title_scrubbed+\".pdf\")\n", | |
"msg.attach(pdf_part)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import smtplib\n", | |
"import getpass\n", | |
"server = smtplib.SMTP('smtp.gmail.com:587') \n", | |
"server.starttls() \n", | |
"server.login(your_gmail, gmailpass)\n", | |
"server.sendmail(your_gmail, kindle_email, msg.as_string())\n", | |
"server.close()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"------------" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment