Skip to content

Instantly share code, notes, and snippets.

@nizarmah
Last active February 18, 2020 16:37
Show Gist options
  • Save nizarmah/7381a2363d508c089c589abf44c45ad9 to your computer and use it in GitHub Desktop.
Save nizarmah/7381a2363d508c089c589abf44c45ad9 to your computer and use it in GitHub Desktop.
Slice-N-Stitch PDFs for OneNote PDF Documents that appear Single Paged
'''
Slice-N-Stitch PDF Fixer Script
Nizar Mahmoud https://nizarmah.me/
Use freely, just would appreciate
keeping this comment on top to
show me off to other people
'''
import sys
import shutil
from os import path
import re
from wand.image import Image
from PyPDF2 import PdfFileMerger
# width to height ratios
wth_ratio = {
"A4": 0.70665083135
}
class PdfSlicer(object):
def __init__(self, pdf_filename, pdf_image, margin_err):
self.pdf_image = pdf_image
self.pdf_filename = pdf_filename
self.margin_err = margin_err
self.slice_width = int(pdf_image.width)
self.slice_height = int(self.slice_width / wth_ratio["A4"])
self.slice_num = -1
def get_slice_name(self):
return (".slice-" + self.pdf_filename + "-"
+ str(self.slice_num) + ".pdf")
def get_next_slice(self):
self.slice_num += 1
with self.pdf_image.clone() as slice:
slice_margin_top = ((self.slice_num * self.slice_height)
- (self.slice_num * self.margin_err))
slice.crop(top=slice_margin_top if slice_margin_top > 0 else 0,
width=self.slice_width, height=self.slice_height)
slice_name = self.get_slice_name()
slice.save(filename=slice_name)
return slice_name
def can_slice_more(self):
return ((((self.slice_num + 1) * self.slice_height)
- (self.slice_num * self.margin_err))
< self.pdf_image.height)
def sns_pdf(input_file, margin_err=150):
pdf_filename = path.splitext(path.basename(input_file))[0]
# pdf stitcher initialized
stitcher = PdfFileMerger()
# convert all the pdf to a single jpg
# and crop the jpg according to a4 wth ratio
with Image(filename=input_file) as pdf_image:
slicer = PdfSlicer(pdf_filename, pdf_image, margin_err)
while slicer.can_slice_more():
slice_name = slicer.get_next_slice()
stitcher.append(open(slice_name, "rb"))
stitcher.write(pdf_filename + "_stitched.pdf")
if __name__ == '__main__':
try:
if len(sys.argv) == 3:
sns_pdf(sys.argv[1], int(sys.argv[2]))
else:
sns_pdf(sys.argv[1])
except:
print('Error : Missing Input File Path')
print('Usage : python ', path.basename(__file__), ' file.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment