Last active
May 31, 2019 17:08
-
-
Save fractaledmind/7226785 to your computer and use it in GitHub Desktop.
This little program uses the Mac app Skim to split scanned PDFs that have two pages layed out on a single PDF page in landscape mode.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* SPLIT TWO-PAGE PDFS | |
--Stephen Margheim | |
-- 11/2/13 | |
-- open source | |
VERSION 3.0 | |
--Version 3 adds a feature for OCR'd PDFs. If your PDF has been OCR'd, the script now automatically crops the individual pages so that the text field is centered. | |
This little program uses the Mac app Skim to split scanned PDFs that have two pages layed out on a single PDF page in landscape mode: | |
---------- | |
| | | | |
| | | | |
---------- | |
USAGE NOTES: | |
If your PDF is oriented such that you first need to ROTATE LEFT or ROTATE RIGHT, you will need to save the PDF in the proper, landscape mode (see above) and then run the script. This is required because Skim's border detection is fixed for the opened PDF. So, if when you open the PDF it is in portrait mode, even if you rotate the PDF, the borders will remain those from the portrait mode. With a rotated PDF, the script will generate a test page split, which you will have to determine whether it is the Left-Hand or Right-Hand page. The script will then run automatically. | |
Also, the script creates a sub-folder within your Documents folder where it saves all of the individual PDF pages before combining them. This sub-folder, entitled "Split PDFs", is also where the final PDF will be saved. Note, all of the individual PDFs are deleted once the script finishes running, so the only left in "Split PDFs" is the final, single page PDF. | |
Finally, I have yet to find an elegant way for the script to run without each individual PDF page popping up for a moment as it is being saved. This is at the top of my list for Version 2. Let me know if you have any suggestions. For now, you will have to deal with the Skim documents opening and closing. | |
Time: Approximately 1 second per original double page | |
*) | |
property moveOn : true | |
property OCR : false | |
--create the "Split PDFs" folder if necessary | |
tell application "Finder" | |
set targetFolder to (path to home folder as string) & "Documents:" as alias | |
try | |
set theLocation to (path to home folder as string) & "Documents:Split PDFs:" as alias | |
on error | |
set theFolder to "Split PDFs" | |
make new folder at targetFolder with properties {name:theFolder} | |
set theLocation to (path to home folder as string) & "Documents:Split PDFs:" as alias | |
end try | |
end tell | |
--split the PDF using Skim | |
tell application "Skim" | |
(* /// | |
PART ONE: Get Key Metadata | |
/// *) | |
--get the PDF title | |
set theTitle to name of front document | |
if theTitle contains ".pdf" then | |
set shortTitle to text items 1 thru -5 of theTitle as string | |
else | |
set shortTitle to theTitle | |
end if | |
--get the number of pages | |
set num to count of index of every page of document 1 | |
--see if any pages have OCR'd text | |
set t to line bounds of every page of document 1 | |
if not (count of t) = num then error | |
set OCRpages to {} | |
repeat with i from 1 to count of t | |
if not item i of t = {} then | |
set OCR to true | |
copy i to end of OCRpages | |
end if | |
end repeat | |
(* /// | |
PART TWO: Handle all Left-Hand Pages | |
/// *) | |
repeat with i from 1 to num | |
--get the rectangular bounds for the full, double page PDF | |
set {xleft, ytop, xright, ybottom} to get bounds for page i of document theTitle | |
--ensure the PDF is properly oriented | |
if ((xright - xleft) / (ytop - ybottom)) > 1 then | |
if ((xright - xleft) / (ytop - ybottom)) < 1.5 then | |
set moveOn to true | |
end if | |
else | |
set moveOn to false | |
exit repeat | |
end if | |
if moveOn = true then | |
--if page is not OCR'd | |
if OCRpages does not contain i then | |
set nxright to xleft + ((xright - xleft) / 2 as integer) | |
my splits(i, theTitle, xleft, ytop, nxright, ybottom) | |
delay 0.2 | |
set newTitle to shortTitle & "_left_" & i | |
save front document as "PDF" in ((theLocation as string) & newTitle) | |
delay 0.2 | |
close document newTitle | |
--if page is OCR'd | |
else if OCRpages contains i then | |
set nxright to xleft + ((xright - xleft) / 2 as integer) | |
my splits(i, theTitle, xleft, ytop, nxright, ybottom) | |
--get rectangular bounds of text field | |
set textBounds_full to line bounds of page 1 of document 1 | |
set textBounds to my get_textBounds(textBounds_full) | |
set {tleft, ttop, tright, tbottom} to textBounds | |
--delete uncentered cropping attempt | |
close document 1 without saving | |
--prepare centered dimensions | |
set nleft to tleft - 20 as integer | |
set nright to tright + 20 as integer | |
--recrop the double page | |
my splits(i, theTitle, nleft, ytop, nright, ybottom) | |
delay 0.2 | |
set newTitle to shortTitle & "_left_" & i | |
save front document as "PDF" in ((theLocation as string) & newTitle) | |
delay 0.2 | |
close document newTitle | |
end if | |
end if | |
end repeat | |
(* /// | |
PART THREE: Handle all Right-Hand Pages | |
/// *) | |
repeat with i from 1 to num | |
--get the rectangular bounds for the full, double page PDF | |
set {xleft, ytop, xright, ybottom} to get bounds for page i of document theTitle | |
--ensure the PDF is properly oriented | |
if ((xright - xleft) / (ytop - ybottom)) > 1 then | |
if ((xright - xleft) / (ytop - ybottom)) < 1.5 then | |
set moveOn to true | |
end if | |
else | |
set moveOn to false | |
exit repeat | |
end if | |
if moveOn = true then | |
--if page is not OCR'd | |
if OCRpages does not contain i then | |
set nxleft to xleft + ((xright - xleft) / 2 as integer) | |
my splits(i, theTitle, nxleft, ytop, xright, ybottom) | |
delay 0.2 | |
set newTitle to shortTitle & "_right_" & i | |
save front document as "PDF" in ((theLocation as string) & newTitle) | |
delay 0.2 | |
close document newTitle | |
--if page is OCR'd | |
else if OCRpages contains i then | |
set nxleft to xleft + ((xright - xleft) / 2 as integer) | |
my splits(i, theTitle, nxleft, ytop, xright, ybottom) | |
--get rectangular bounds of half page | |
set {hleft, htop, hright, hbottom} to bounds of page 1 of front document | |
--get rectangular bounds of text field | |
set textBounds_full to line bounds of page 1 of front document | |
set textBounds to my get_textBounds(textBounds_full) | |
set {tleft, ttop, tright, tbottom} to textBounds | |
--delete uncentered cropping attempt | |
close front document without saving | |
--prepare centered dimensions | |
set nleft to ((nxleft + (tleft - hleft)) - 20) as integer | |
set nright to ((xright - (hright - tright)) + 20) as integer | |
--recrop the double page | |
my splits(i, theTitle, nleft, ytop, nright, ybottom) | |
delay 0.2 | |
set newTitle to shortTitle & "_right_" & i | |
save front document as "PDF" in ((theLocation as string) & newTitle) | |
delay 0.2 | |
close document newTitle | |
end if | |
end if | |
end repeat | |
my combinePDFPages(num, shortTitle) | |
(* /// | |
PART FOUR: Crop Rotated PDFs | |
/// *) | |
if moveOn = false then | |
--get the rectangular bounds for the full, double page PDF | |
set {xleft, ytop, xright, ybottom} to get bounds for page 1 of document theTitle | |
--crop a trial page to determine PDF rotation | |
set ntop to ybottom + ((ytop - ybottom) / 2) | |
my splits(1, theTitle, xleft, ntop, xright, ybottom) | |
set pageQu to display dialog "Is this the Left-Hand page or the Right-Hand page?" with title "Skim Page Splitter" buttons {"Left", "Right"} | |
set pageSide to button returned of pageQu | |
if exists button returned of pageQu then | |
close front document without saving | |
end if | |
if pageSide = "Left" then | |
--crop and save all left hand pages individually | |
repeat with i from 1 to num | |
set ntop to ybottom + ((ytop - ybottom) / 2) | |
my splits(i, theTitle, xleft, ntop, xright, ybottom) | |
delay 0.2 | |
set newTitle to shortTitle & "_left_" & i | |
save front document as "PDF" in ((theLocation as string) & newTitle) | |
delay 0.2 | |
close document newTitle | |
end repeat | |
--crop and save all right hand pages individually | |
repeat with i from 1 to num | |
set nbottom to ybottom + ((ytop - ybottom) / 2) | |
my splits(i, theTitle, xleft, ytop, xright, nbottom) | |
delay 0.2 | |
set newTitle to shortTitle & "_right_" & i | |
save front document as "PDF" in ((theLocation as string) & newTitle) | |
delay 0.2 | |
close document newTitle | |
end repeat | |
--take the individual PDFs and combine into a new, single PDF | |
my combinePDFPages(num, shortTitle) | |
else if pageSide = "Right" then | |
--crop and save all left hand pages individually | |
repeat with i from 1 to num | |
set {xleft, ytop, xright, ybottom} to get bounds for page i of document 1 | |
set nbottom to ybottom + ((ytop - ybottom) / 2) | |
set leftpage to grab page i of document 1 for {xleft, ytop, xright, nbottom} | |
set the clipboard to result | |
tell application "System Events" to tell process "Skim" to tell menu bar 1 to ¬ | |
tell menu bar item 3 to tell menu 1 to click menu item 1 | |
delay 0.2 | |
save front document as "PDF" in ((theLocation as string) & theTitle & "_left_" & i) | |
delay 0.2 | |
close front document | |
end repeat | |
--crop and save all right hand pages individually | |
repeat with i from 1 to num | |
set {xleft, ytop, xright, ybottom} to get bounds for page i of document 1 | |
set ntop to ybottom + ((ytop - ybottom) / 2) | |
set rightpage to grab page i of document 1 for {xleft, ntop, xright, ybottom} | |
set the clipboard to result | |
tell application "System Events" to tell process "Skim" to tell menu bar 1 to ¬ | |
tell menu bar item 3 to tell menu 1 to click menu item 1 | |
delay 0.2 | |
save front document as "PDF" in ((theLocation as string) & theTitle & "_right_" & i) | |
delay 0.2 | |
close front document | |
end repeat | |
--take the individual PDFs and combine into a new, single PDF | |
my combinePDFPages(num, shortTitle) | |
end if | |
end if | |
end tell | |
(* HANDLERS *) | |
on splits(i, theTitle, l, t, r, b) | |
tell application "Skim" | |
set leftpage to grab page i of document theTitle for {l, t, r, b} | |
set the clipboard to result | |
tell application "System Events" to tell process "Skim" to tell menu bar 1 to ¬ | |
tell menu bar item 3 to tell menu 1 to click menu item 1 | |
end tell | |
end splits | |
on get_textBounds(t) | |
set xlefts to {} | |
set ytops to {} | |
set xrights to {} | |
set ybottoms to {} | |
repeat with i from 1 to count of t | |
set x to item i of t | |
set {xleft, ytop, xright, ybottom} to {item 1 of x, item 2 of x, item 3 of x, item 4 of x} | |
copy xleft to end of xlefts | |
copy ytop to end of ytops | |
copy xright to end of xrights | |
copy ybottom to end of ybottoms | |
end repeat | |
set xlefts to sortlist xlefts | |
set ytops to sortlist ytops | |
set xrights to sortlist xrights | |
set ybottoms to sortlist ybottoms | |
set {nleft, ntop, nright, nbottom} to {item 1 of xlefts, item -1 of ytops, item -1 of xrights, item 1 of ybottoms} | |
return nleft & ntop & nright & nbottom | |
end get_textBounds | |
on combinePDFPages(num, shortTitle) | |
--save all left hand page PDF files to list | |
set l to {} | |
repeat with i from 1 to num | |
set f to "Macintosh HD:Users:smargheim:Documents:Split PDFs:" & shortTitle & "_left_" & i & ".pdf" | |
set f to f as alias | |
copy f to end of l | |
end repeat | |
--save all right hand page PDF files to list | |
repeat with i from 1 to num | |
set f to "Macintosh HD:Users:smargheim:Documents:Split PDFs:" & shortTitle & "_right_" & i & ".pdf" | |
set f to f as alias | |
copy f to end of l | |
end repeat | |
--reorder list to go: left1, right1, left2, right2, etc. | |
set n to ((count of l) / 2) | |
set nl to my groupList(l, n) | |
set thePDFs to my interlaceLists(item 1 of nl, item 2 of nl) | |
--prepare final PDF file | |
set outputFolder to "Macintosh HD:Users:smargheim:Documents:Split PDFs:" as alias | |
set outputFile to (outputFolder as text) & shortTitle & "_cropped.pdf" | |
tell application "Skim" | |
--check if user wishes to keep or discard first left-hand page (which is sometimes the last page of another text) | |
set leftCheck to display dialog "Include the first left hand page in final PDF?" with title "Skim Page Splitter" buttons {"Keep", "Discard"} | |
end tell | |
if button returned of leftCheck = "Keep" then | |
--combine ALL individual PDF pages into new, single PDF | |
set pdfArgs to "" | |
repeat with i from 1 to count of thePDFs | |
set p to item i of thePDFs | |
set pdfArgs to pdfArgs & " " & quoted form of POSIX path of p | |
end repeat | |
do shell script "\"/System/Library/Automator/Combine PDF Pages.action/Contents/Resources/join.py\" -o " & quoted form of POSIX path of outputFile & pdfArgs | |
else | |
--combine ALL individual PDF pages into new, single PDF | |
set pdfArgs to "" | |
repeat with i from 2 to count of thePDFs | |
set p to item i of thePDFs | |
set pdfArgs to pdfArgs & " " & quoted form of POSIX path of p | |
end repeat | |
do shell script "\"/System/Library/Automator/Combine PDF Pages.action/Contents/Resources/join.py\" -o " & quoted form of POSIX path of outputFile & pdfArgs | |
end if | |
--delete the individual page PDFs | |
tell application "Finder" | |
delete every item of thePDFs | |
end tell | |
tell application "Skim" | |
open outputFile as alias | |
end tell | |
end combinePDFPages | |
(* SUB-ROUTINES *) | |
on interlaceLists(list1, list2) | |
-- HAS (http://applemods.sourceforge.net/mods/Data/List.php) | |
local list1, list2 | |
try | |
if list1's class is not list then error "not a list." number -1704 | |
if list2's class is not list then error "not a list." number -1704 | |
script k | |
property l1 : list1 | |
property l2 : list2 | |
property res : {} | |
end script | |
if (count of k's l1) is not (count of k's l2) then error "lists are different lengths." | |
repeat with i from 1 to count k's l1 | |
set k's res's end to k's l1's item i | |
set k's res's end to k's l2's item i | |
end repeat | |
return k's res | |
on error eMsg number eNum | |
error "Can't interlaceLists: " & eMsg number eNum | |
end try | |
end interlaceLists | |
on groupList(lst, groupLen) | |
-- HAS (http://applemods.sourceforge.net/mods/Data/List.php) | |
local lst, tailLen, groupLen, idx | |
try | |
if lst's class is not list then error "not a list." number -1704 | |
script k | |
property l : lst | |
property res : {} | |
end script | |
set tailLen to (count of k's l) mod groupLen | |
repeat with idx from 1 to ((count of k's l) - tailLen) by groupLen | |
set k's res's end to k's l's items idx thru (idx + groupLen - 1) | |
end repeat | |
if tailLen is not 0 then | |
set k's res's end to k's l's items -tailLen thru -1 | |
end if | |
return k's res | |
on error eMsg number eNum | |
error "Can't groupList: " & eMsg number eNum | |
end try | |
end groupList |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment