Skip to content

Instantly share code, notes, and snippets.

@gatheluck
Created July 18, 2018 23:51
Show Gist options
  • Save gatheluck/ee87b4e12586a222bafb5409d9ba96cd to your computer and use it in GitHub Desktop.
Save gatheluck/ee87b4e12586a222bafb5409d9ba96cd to your computer and use it in GitHub Desktop.
Automatic file converting from pdf to jpg
# coding: utf-8
import os
import os.path
import fnmatch
import subprocess
import argparse
# Parse the arguments
parser = argparse.ArgumentParser(description='Tile the first num-pages horizontally into a JPEG image.')
parser.add_argument("-paper_dir", type=str, default='/Users/yosssshi/Documents/projects--code-conference/project--citeseeing/src/18_CiteSeeing/data/CVPR2016/pdf', help='Path to PDF')
args = parser.parse_args()
def execute(root_path):
for dirpath, _, filenames in os.walk(root_path):
for filename in filenames:
if fnmatch.fnmatch(filename, u"*.pdf"):
org_path = os.path.join(dirpath, filename)
jpg_path = org_path.replace("pdf", "jpg")
print("convert {0} to {1}".format(org_path, jpg_path))
if os.path.exists(jpg_path[0:len(jpg_path)-4]):
print("dir exists.")
else:
os.mkdir(jpg_path[0:len(jpg_path)-4])
#image_path = os.path.join(jpg_path[0:len(jpg_path)-4],filename.replace("pdf", "jpg"))
image_path = os.path.join(jpg_path[0:len(jpg_path)-4],filename.replace(".pdf", ""))
print("org_path: ", org_path)
print("image_path: ", image_path)
#if subprocess.call(["convert", "-density", "300", "-trim", org_path, image_path]) != 0:
#if subprocess.call(["convert", "-density", "500", org_path, image_path]) != 0: # Ghostscript
if subprocess.call(["gs", "-dBATCH", "-dNOPAUSE", "-sDEVICE=jpeg", "-r500", "-sOutputFile={0}_%01d.jpg".format(image_path), org_path]) != 0:
print ("failed: {0}".format(org_path))
if __name__ == '__main__':
execute(args.paper_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment