Last active
June 16, 2020 06:23
-
-
Save rririanto/691e97ce22f7cd43d6a9d54305344587 to your computer and use it in GitHub Desktop.
Python Convert PDF to Image
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Problem: | |
How to Convert PDF to Image with Python Script ? | |
Installation: | |
I use ubuntu OS 14.04 | |
We use wrapper for ImageMagick [http://www.imagemagick.org/script/index.php] to Convert The PDF file | |
in Python do: | |
$ sudo apt-get install libmagickwand-dev | |
$ pip install Wand | |
now install PIL | |
$ pip install Pillow | |
More Installation http://sorry-wand.readthedocs.org/en/latest/guide/install.html | |
more about wand https://pypi.python.org/pypi/Wand | |
""" | |
from PIL import Image as Img | |
from wand.image import Image | |
import uuid | |
import numpy as np | |
import glob | |
import os | |
import sys | |
def convert(filepdf): | |
#used to generate temp file name. so we will not duplicate or replace anything | |
uuid_set = str(uuid.uuid4().fields[-1])[:5] | |
try: | |
#now lets convert the PDF to Image | |
#this is good resolution As far as I know | |
with Image(filename=filepdf, resolution=200) as img: | |
#keep good quality | |
img.compression_quality = 80 | |
#save it to tmp name | |
img.save(filename="temp/temp%s.jpg" % uuid_set) | |
except Exception, err: | |
#always keep track the error until the code has been clean | |
#print err | |
return False | |
else: | |
""" | |
We finally success to convert pdf to image. | |
but image is not join by it self when we convert pdf files to image. | |
now we need to merge all file | |
""" | |
pathsave = [] | |
try: | |
#search all image in temp path. file name ends with uuid_set value | |
list_im = glob.glob("temp/temp%s*.jpg" % uuid_set) | |
list_im.sort() #sort the file before joining it | |
imgs = [Img.open(i) for i in list_im] | |
#now lets Combine several images vertically with Python | |
min_shape = sorted([(np.sum(i.size), i.size) for i in imgs])[0][1] | |
imgs_comb = np.vstack( | |
(np.asarray(i.resize(min_shape)) for i in imgs)) | |
# for horizontally change the vstack to hstack | |
imgs_comb = Img.fromarray(imgs_comb) | |
pathsave = "MyPdf%s.jpg" % uuid_set | |
#now save the image | |
imgs_comb.save(pathsave) | |
#and then remove all temp image | |
for i in list_im: | |
os.remove(i) | |
except Exception, err: | |
#print err | |
return False | |
return pathsave | |
if __name__ == "__main__": | |
arg = sys.argv[1] | |
result = convert(arg) | |
if result: | |
print "[*] Succces convert %s and save it to %s" % (arg, result) | |
else: | |
print "[!] Whoops. something wrong dude. enable err var to track it" | |
""" | |
=========================================== | |
Running Test: | |
python testing-pdf.py zz.pdf | |
[*] Succces convert zz.pdf and save it to Resume63245.jpg | |
=========================================== | |
""" | |
#well I hope this will be useful for you & others. |
Not sure if anyone else has seen this problem, but I'm getting this error from my OS X command line:
attempting to load the font using its name.
Output may be incorrect.
Fontconfig error: Cannot load default config file
Fontconfig error: Cannot load default config file
**** Error: can't process embedded font stream,
attempting to load the font using its name.
Output may be incorrect.```
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@akc185: try the following: