Last active
February 18, 2020 16:37
-
-
Save nizarmah/7381a2363d508c089c589abf44c45ad9 to your computer and use it in GitHub Desktop.
Slice-N-Stitch PDFs for OneNote PDF Documents that appear Single Paged
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Slice-N-Stitch PDF Fixer Script | |
Nizar Mahmoud https://nizarmah.me/ | |
Use freely, just would appreciate | |
keeping this comment on top to | |
show me off to other people | |
''' | |
import sys | |
import shutil | |
from os import path | |
import re | |
from wand.image import Image | |
from PyPDF2 import PdfFileMerger | |
# width to height ratios | |
wth_ratio = { | |
"A4": 0.70665083135 | |
} | |
class PdfSlicer(object): | |
def __init__(self, pdf_filename, pdf_image, margin_err): | |
self.pdf_image = pdf_image | |
self.pdf_filename = pdf_filename | |
self.margin_err = margin_err | |
self.slice_width = int(pdf_image.width) | |
self.slice_height = int(self.slice_width / wth_ratio["A4"]) | |
self.slice_num = -1 | |
def get_slice_name(self): | |
return (".slice-" + self.pdf_filename + "-" | |
+ str(self.slice_num) + ".pdf") | |
def get_next_slice(self): | |
self.slice_num += 1 | |
with self.pdf_image.clone() as slice: | |
slice_margin_top = ((self.slice_num * self.slice_height) | |
- (self.slice_num * self.margin_err)) | |
slice.crop(top=slice_margin_top if slice_margin_top > 0 else 0, | |
width=self.slice_width, height=self.slice_height) | |
slice_name = self.get_slice_name() | |
slice.save(filename=slice_name) | |
return slice_name | |
def can_slice_more(self): | |
return ((((self.slice_num + 1) * self.slice_height) | |
- (self.slice_num * self.margin_err)) | |
< self.pdf_image.height) | |
def sns_pdf(input_file, margin_err=150): | |
pdf_filename = path.splitext(path.basename(input_file))[0] | |
# pdf stitcher initialized | |
stitcher = PdfFileMerger() | |
# convert all the pdf to a single jpg | |
# and crop the jpg according to a4 wth ratio | |
with Image(filename=input_file) as pdf_image: | |
slicer = PdfSlicer(pdf_filename, pdf_image, margin_err) | |
while slicer.can_slice_more(): | |
slice_name = slicer.get_next_slice() | |
stitcher.append(open(slice_name, "rb")) | |
stitcher.write(pdf_filename + "_stitched.pdf") | |
if __name__ == '__main__': | |
try: | |
if len(sys.argv) == 3: | |
sns_pdf(sys.argv[1], int(sys.argv[2])) | |
else: | |
sns_pdf(sys.argv[1]) | |
except: | |
print('Error : Missing Input File Path') | |
print('Usage : python ', path.basename(__file__), ' file.pdf') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment