Skip to content

Instantly share code, notes, and snippets.

@ksindi
Created August 28, 2017 15:02
Show Gist options
  • Save ksindi/18a1bfcaf3771a25fecc3fc332356ac2 to your computer and use it in GitHub Desktop.
Save ksindi/18a1bfcaf3771a25fecc3fc332356ac2 to your computer and use it in GitHub Desktop.
"""
Dockerfile
FROM python:3.6
RUN apt-get install -y ghostscript \
libmagickwand-dev
RUN pip install wand PyPDF2
"""
import io
from pathlib import Path
import PyPDF2
from wand.image import Image
IMAGE_TYPE = 'jpeg'
save_dir = 'my-save-dir'
pdf_bytes = io.BytesIO(raw)
def image_etl(pdf_bytes, save_path, resolution=200):
pdf = PyPDF2.PdfFileReader(pdf_bytes)
writer = PyPDF2.PdfFileWriter()
writer.addPage(pdf.getPage(0))
pdf_bytes = io.BytesIO()
writer.write(pdf_bytes)
pdf_bytes.seek(0)
img = Image(file=pdf_bytes, resolution=resolution)
img.convert(IMAGE_TYPE)
with img.convert(IMAGE_TYPE) as converted:
converted.save(filename=str(save_path))
return img
try:
image_etl(pdf_bytes, image_path)
except Exception as e:
print("Failed converting pdf to image", e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment