Created
April 24, 2024 23:35
-
-
Save Snarp/6be842086b31fa82ed557c6d83b90256 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'fileutils' | |
# For use in cleaning scanned book/magazine/etc pages and preparing the scans | |
# for distribution/OCR. | |
# | |
# Assume you've been scanning a book as double-page scans. You can | |
# automatically split them into two files via ImageMagick `mogrify`: | |
# | |
# mogrify -path "./Auto-Halved Pages" -format png -crop 50%x100% +repage "./Double-Page Scans/*.png" | |
# | |
# ...which vertically divides an file named (for example), | |
# | |
# "./Double-Page Scans/22.png" (pages 22 and 23) | |
# | |
# into two files named | |
# | |
# "./Auto-Halved Pages/22-0.png" (page 22, left) | |
# "./Auto-Halved Pages/22-1.png" (page 23, right) | |
# | |
# The Ruby script below will then correctly rename `22-0.png` to `22.png` and | |
# `22-1.png` to `23.png`. | |
# | |
# WARNING: This will not work on documents that read right-to-left! The | |
# '-1' and '-0' suffixes will be swapped! Lazy way to fix this: rename all the | |
# `*-0.png`s to `*-2.png` or something.) | |
def rename_halved(src_dir='Auto-Halved Pages', | |
dest_dir: 'Correctly-Numbered Pages', # nil okay | |
ext: '.png', # nil okay | |
copy: nil, | |
digits: nil, | |
start_page_num: nil, | |
noop: false) | |
if ext && !ext.empty? && ext[0]!='.' | |
ext='.'+ext | |
end | |
fnames=Dir.glob(File.join(src_dir,"*#{ext || '.*'}"),File::FNM_CASEFOLD).sort | |
if (!start_page_num || !digits) | |
m = /(?<left_page_num>\d+)-(?<diff>0|1)/.match(File.basename fnames.first) | |
digits ||= m[:left_page_num].length | |
start_page_num ||= m[:left_page_num].to_i | |
end | |
FileUtils.mkdir_p(dest_dir) if (dest_dir && !noop) | |
page = start_page_num | |
fnames.map do |old_fname| | |
new_fname = File.join( | |
(dest_dir || src_dir), | |
"#{page}".rjust(digits,"0")+(ext || File.extname(old_fname)) | |
) | |
if noop | |
# do nothing | |
elsif dest_dir || copy | |
FileUtils.cp(old_fname, new_fname) | |
else | |
FileUtils.mv(old_fname, new_fname) | |
end | |
page+=1 | |
new_fname | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment