Skip to content

Instantly share code, notes, and snippets.

@baffioso
Created February 3, 2018 12:31
Show Gist options
  • Save baffioso/79e3c5d32dd11cf677168ab39b72386a to your computer and use it in GitHub Desktop.
Save baffioso/79e3c5d32dd11cf677168ab39b72386a to your computer and use it in GitHub Desktop.
Opdater lokalplan tabel i postgres med tekst fra plandokument pdf
import requests
import io
import PyPDF2
import psycopg2
from sqlalchemy import create_engine, Table, MetaData, update, select
def get_document(url):
try:
r = requests.get(url)
except Exception as e:
print(e.message, e.args)
pdf_file = io.BytesIO(r.content)
pdfReader = PyPDF2.PdfFileReader(pdf_file)
#discerning the number of pages will allow us to parse through all #the pages
num_pages = pdfReader.numPages
count = 0
text = ""
#The while loop will read each page
while count < num_pages:
pageObj = pdfReader.getPage(count)
count +=1
text += pageObj.extractText()
return text
# db creds
user = 'gc2'
pw = 'xxx'
port = 5432
host = 'myhost'
db = 'ballerup'
# creating engine, connecting to db and fetching metadata
engine = create_engine('postgresql://{0}:{1}@{2}:{3}/{4}'.format(user, pw, host, port, db))
connection = engine.connect()
metadata = MetaData()
# Updating created "document" column with text from pdf document
plan = Table('lokalplan_dokument', metadata, autoload=True, autoload_with=engine)
result_set = connection.execute("SELECT id, doklink FROM lokalplan_dokument where kommunenavn = 'Ballerup'")
for r in result_set:
plan_update = plan.update().values(document=get_document(r['doklink'])).where(plan.c.id == r['id'])
engine.execute(plan_update)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment