Last active
April 3, 2022 00:58
-
-
Save jakebox/a882bc695776854df065ed5a5db2520e to your computer and use it in GitHub Desktop.
PDF Downloader/Reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### | |
### PDF Parser/Downloader | |
### By Jake B - December 2019 | |
### | |
import os | |
import requests | |
import PyPDF2 | |
import re | |
# Downloads the menu and saves it as a PDF | |
url = 'URL OF PDF GOES HERE' | |
filename = "lunch.pdf" | |
r = requests.get(url, allow_redirects = True) | |
open(filename, 'wb').write(r.content) | |
# Loading the file and grabbing the first page into a string | |
lunchList = open(filename,'rb') | |
read_pdf = PyPDF2.PdfFileReader(lunchList) | |
page = read_pdf.getPage(0) | |
page_content = page.extractText() | |
# Prints the PDF's content | |
print(page_content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment