ivanistheone · October 2, 2022 01:31
diff --git a/mkbirdseye.py b/mkbirdseye.py
 #!/usr/bin/env python
 import argparse
 import os
 try:
    from pdf2image import convert_from_path
    from PIL import Image
 except ImportError:
    print("You need to run `pip install Pillow pdf2image` before this script")
    import sys
    sys.exit(-1)


 def mkbirdseye(pdfpath, pngpath, basewidth=400, firstpage=1, lastpage=None, croptop=1.5, cropbottom=1.3, maxpages=100):
    """
    Convert PDF at `pdfpath` into a bird's eye overview PNG saved to `pngpath`.
    """
    # 0. load the PDF's pages as PIL image objects
    fpath_in = pdfpath
    RESOLUTION = 600
    all_pages = convert_from_path(fpath_in, dpi=RESOLUTION, first_page=firstpage)

    if lastpage:
        lastpage_index = lastpage - firstpage
        all_pages = all_pages[0:lastpage_index+1]

    # split `all_pages` into bundles of pages containing at most `maxpages` each
    bundles = []
    for i in range(0, len(all_pages), maxpages):
        pages = all_pages[i:i + maxpages]
        bundles.append(pages)

    for bi, pages in enumerate(bundles):
        if len(bundles) > 1:
            print("Processing bundle", bi, "...")
            thepngpath = pngpath.replace(".png", "_part" + str(bi+1).zfill(2) + ".png")
        else:
            thepngpath = pngpath

        # 1. crop pages to remove headers and footer that repeats on each page
        crop_top = croptop/2.54*RESOLUTION
        crop_bottom = cropbottom/2.54*RESOLUTION
        cropped_pages = []
        num_pages = len(pages)
        for i, page in enumerate(pages):
            width, height = page.size   # Get dimensions
            left = 0
            right = width
            if i == 0:                  # crop only bottom
                top = 0
                bottom = height - crop_bottom
            elif i == num_pages-1:      # crop only top
                top = crop_top
                bottom = height
            else:                       # crop both top and bottom
                top = crop_top
                bottom = height - crop_bottom
            cropped_page = page.crop((left, top, right, bottom))
            cropped_pages.append(cropped_page)

        # 2. resize pages to basewidth = 400
        images = []
        for page in cropped_pages:
            img = page
            wpercent = (basewidth/float(img.size[0]))
            hsize = int((float(img.size[1])*float(wpercent)))
            img = img.resize((basewidth,hsize), Image.ANTIALIAS)
            images.append(img)

        # 3. Combine images to form new_img
        widths, heights = zip(*(i.size for i in images))
        total_height = sum(heights)
        max_width = max(widths)*3  # extra page widths for notes
        new_im = Image.new('RGB', (max_width, total_height), color=(255,255,255) )
        x_offset = int(0.136*max_width)
        y_offset = 0
        for im in images:
            new_im.paste(im, (x_offset,y_offset))
            y_offset += im.size[1]

        # save dat outline yo!
        new_im.save(thepngpath)
        print("Saved file to", thepngpath)



 def main():
    arg_parser = argparse.ArgumentParser(
        description="Genrate a bird's eye overview PNG from a PDF document",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    arg_parser.add_argument('pdfpath',      help='The input PDF file path')
    arg_parser.add_argument('--name',       help='Output filename', required=False)
    arg_parser.add_argument('--basewidth',  help='Resolution (png will be 3*basewidth)', default='400')
    arg_parser.add_argument('--croptop',    help='Crop top from pages', default='1.5')
    arg_parser.add_argument('--cropbottom', help='Crop bottom from pages', default='1.3')
    arg_parser.add_argument('--firstpage',  help='Start at page (default FIRST)', default=1)
    arg_parser.add_argument('--lastpage',   help='End page (default to LAST)', required=False)
    arg_parser.add_argument('--maxpages',   help='Maximum number of pages per image', default='100')
    args = arg_parser.parse_args()
    print(args)

    # convert CLI args to python types
    if not os.path.exists(args.pdfpath):
        arg_parser.error("The file %s does not exist!" % args.pdfpath)
    if args.name:
        if args.name.endswith('.png'):
            pngpath = args.name
        else:
            pngpath = args.name + '.png'
    else:
        pngpath = args.pdfpath.replace('.pdf', '.png')
    basewidth = int(args.basewidth)
    firstpage = int(args.firstpage)
    if args.lastpage:
        lastpage = int(args.lastpage)
    else:
        lastpage = None
    croptop = float(args.croptop)
    cropbottom = float(args.cropbottom)
    maxpages = int(args.maxpages)

    # call the actual function
    mkbirdseye(args.pdfpath, pngpath,
               basewidth=basewidth,
               firstpage=firstpage,
               lastpage=lastpage,
               croptop=croptop,
               cropbottom=cropbottom,
               maxpages=maxpages)



 if __name__ == '__main__':
    """
    Entry point used when running the script on the command line. Usage:
        ./mkbirdseye.py some.pdf
    """
    main()
	#!/usr/bin/env python
	import argparse
	import os
	try:
	from pdf2image import convert_from_path
	from PIL import Image
	except ImportError:
	print("You need to run `pip install Pillow pdf2image` before this script")
	import sys
	sys.exit(-1)


	def mkbirdseye(pdfpath, pngpath, basewidth=400, firstpage=1, lastpage=None, croptop=1.5, cropbottom=1.3, maxpages=100):
	"""
	Convert PDF at `pdfpath` into a bird's eye overview PNG saved to `pngpath`.
	"""
	# 0. load the PDF's pages as PIL image objects
	fpath_in = pdfpath
	RESOLUTION = 600
	all_pages = convert_from_path(fpath_in, dpi=RESOLUTION, first_page=firstpage)

	if lastpage:
	lastpage_index = lastpage - firstpage
	all_pages = all_pages[0:lastpage_index+1]

	# split `all_pages` into bundles of pages containing at most `maxpages` each
	bundles = []
	for i in range(0, len(all_pages), maxpages):
	pages = all_pages[i:i + maxpages]
	bundles.append(pages)

	for bi, pages in enumerate(bundles):
	if len(bundles) > 1:
	print("Processing bundle", bi, "...")
	thepngpath = pngpath.replace(".png", "_part" + str(bi+1).zfill(2) + ".png")
	else:
	thepngpath = pngpath

	# 1. crop pages to remove headers and footer that repeats on each page
	crop_top = croptop/2.54*RESOLUTION
	crop_bottom = cropbottom/2.54*RESOLUTION
	cropped_pages = []
	num_pages = len(pages)
	for i, page in enumerate(pages):
	width, height = page.size # Get dimensions
	left = 0
	right = width
	if i == 0: # crop only bottom
	top = 0
	bottom = height - crop_bottom
	elif i == num_pages-1: # crop only top
	top = crop_top
	bottom = height
	else: # crop both top and bottom
	top = crop_top
	bottom = height - crop_bottom
	cropped_page = page.crop((left, top, right, bottom))
	cropped_pages.append(cropped_page)

	# 2. resize pages to basewidth = 400
	images = []
	for page in cropped_pages:
	img = page
	wpercent = (basewidth/float(img.size[0]))
	hsize = int((float(img.size[1])*float(wpercent)))
	img = img.resize((basewidth,hsize), Image.ANTIALIAS)
	images.append(img)

	# 3. Combine images to form new_img
	widths, heights = zip(*(i.size for i in images))
	total_height = sum(heights)
	max_width = max(widths)*3 # extra page widths for notes
	new_im = Image.new('RGB', (max_width, total_height), color=(255,255,255) )
	x_offset = int(0.136*max_width)
	y_offset = 0
	for im in images:
	new_im.paste(im, (x_offset,y_offset))
	y_offset += im.size[1]

	# save dat outline yo!
	new_im.save(thepngpath)
	print("Saved file to", thepngpath)



	def main():
	arg_parser = argparse.ArgumentParser(
	description="Genrate a bird's eye overview PNG from a PDF document",
	formatter_class=argparse.ArgumentDefaultsHelpFormatter
	)
	arg_parser.add_argument('pdfpath', help='The input PDF file path')
	arg_parser.add_argument('--name', help='Output filename', required=False)
	arg_parser.add_argument('--basewidth', help='Resolution (png will be 3*basewidth)', default='400')
	arg_parser.add_argument('--croptop', help='Crop top from pages', default='1.5')
	arg_parser.add_argument('--cropbottom', help='Crop bottom from pages', default='1.3')
	arg_parser.add_argument('--firstpage', help='Start at page (default FIRST)', default=1)
	arg_parser.add_argument('--lastpage', help='End page (default to LAST)', required=False)
	arg_parser.add_argument('--maxpages', help='Maximum number of pages per image', default='100')
	args = arg_parser.parse_args()
	print(args)

	# convert CLI args to python types
	if not os.path.exists(args.pdfpath):
	arg_parser.error("The file %s does not exist!" % args.pdfpath)
	if args.name:
	if args.name.endswith('.png'):
	pngpath = args.name
	else:
	pngpath = args.name + '.png'
	else:
	pngpath = args.pdfpath.replace('.pdf', '.png')
	basewidth = int(args.basewidth)
	firstpage = int(args.firstpage)
	if args.lastpage:
	lastpage = int(args.lastpage)
	else:
	lastpage = None
	croptop = float(args.croptop)
	cropbottom = float(args.cropbottom)
	maxpages = int(args.maxpages)

	# call the actual function
	mkbirdseye(args.pdfpath, pngpath,
	basewidth=basewidth,
	firstpage=firstpage,
	lastpage=lastpage,
	croptop=croptop,
	cropbottom=cropbottom,
	maxpages=maxpages)



	if __name__ == '__main__':
	"""
	Entry point used when running the script on the command line. Usage:
	./mkbirdseye.py some.pdf
	"""
	main()