I hereby claim:
- I am christopherkullenberg on github.
- I am christopherk (https://keybase.io/christopherk) on keybase.
- I have a public key ASDboeD8TeJEQ73WqCMqh2GOG5qZyAilttRdIbkO0hQgEQo
To claim this, I am signing this object:
def pdfparser(fn): | |
import subprocess | |
'''Requires pdftotext to be installed on the system. Please change directories | |
accordingly.''' | |
f = "upload/" + fn # fn is the filename. upload/ is a directory, pls change. | |
cmd = 'python3 upload/pdf2txt.py -o %s.txt %s' % (f.split('.')[0], f) | |
run = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
out, err = run.communicate() | |
# display errors if they occur | |
#if err: |
from subprocess import Popen, PIPE, STDOUT | |
from nltk.tokenize import sent_tokenize #make sure to install the full corpus. | |
import re | |
aFile = '/home/christopher/Desktop/Introduction to Computation and Programming Using Python, Revised - Guttag, John V..pdf' | |
def pdftoText(filename): | |
''' | |
Input: a PDF file | |
Output: output of pdftotext. |
''' | |
Input: file with LaTeX links, ex. "\href{http://example.com}{example}" | |
Output: file with mardown links, ex. [example](http://example.com) | |
''' | |
import re | |
outfile = open('filewithmarkdownlinks.md', 'w') #make up a new filename | |
texfile = open('filewithlatexlinks.md', encoding='utf-8') # the file with latex links in it | |
textext = texfile.readlines() |
I hereby claim:
To claim this, I am signing this object: