Skip to content

Instantly share code, notes, and snippets.

@jschpp
Last active January 13, 2017 08:04
Show Gist options
  • Save jschpp/97f5f26090da08888e0d2fe8bf6b2e6b to your computer and use it in GitHub Desktop.
Save jschpp/97f5f26090da08888e0d2fe8bf6b2e6b to your computer and use it in GitHub Desktop.
Count sum of number of pages in a folder. Can be called with `-r` to check subdirectories also.
"""count pdf pages"""
from __future__ import print_function
import sys
from getopt import GetoptError, getopt
from PyPDF2 import PdfFileReader
try:
from os import scandir
except ImportError:
from scandir import scandir
def get_page_count(paths=None, recursive=False):
"""Returns sum of pages from all found pdf files within directory
Keyword arguments:
paths -- list of path strings (default current path)
recursive -- search subfolders recursively (default False)
"""
paths = paths if paths else ["."]
pdffiles = list()
count = 0
while paths:
curr = paths.pop()
for direntry in scandir(curr):
if recursive and direntry.is_dir():
paths.append(direntry.path)
elif direntry.is_file and direntry.path[-3:] == "pdf":
pdffiles.append(direntry.path)
for filename in pdffiles:
count += get_page_number(filename)
return count
def get_page_number(filename):
"""Returns number of pages in filename"""
with open(filename, mode='rb'):
return PdfFileReader(filename, strict=False).numPages
def main(argv):
"""usage pdfpagecount.py [-r] <path>"""
recursive = False
paths = ["."]
try:
opts, args = getopt(argv, "hr", ["recursive"])
except GetoptError:
print('pdfpagecount.py [-r] <path>')
sys.exit(2)
for opt in opts:
opt = opt.first
if opt == '-h':
print('pdfpagecount.py [-r] <path>')
sys.exit()
elif opt in ("-r", "--recursive"):
recursive = True
if args:
paths = args
print(get_page_count(paths, recursive))
if __name__ == "__main__":
main(sys.argv[1:])
"""pdfpagecount test"""
import unittest
import os
from urllib import urlretrieve
import pdfpagecount
class Test(unittest.TestCase):
"""Test Case"""
def setUp(self):
"""change to correct testfiles folder
This testCase needs a folder structure like this:
+---pdfpagecount.py
+---pdfpagecount_test.py
|
+---testfiles
| \---get_page_count
| +---empty
| \---full
| +---file.pdf
file.pdf can have an arbitrary name but must contain only one page
"""
try:
os.chdir("./testfiles/get_page_count")
except os.error:
os.makedirs("./testfiles/get_page_count")
os.makedirs("./testfiles/get_page_count/full")
os.makedirs("./testfiles/get_page_count/empty")
urlretrieve("https://stlab.adobe.com/wiki/images/d/d3/Test.pdf",
"./testfiles/get_page_count/full/file.pdf")
os.chdir("./testfiles/get_page_count")
def test_get_page_count_empty(self):
"""Empty folder should contain no pages"""
os.chdir("./empty")
self.assertEqual(0, pdfpagecount.get_page_count())
def test_get_page_count_full(self):
"""Folder contains pdf file with one page"""
os.chdir("./full")
self.assertEqual(1, pdfpagecount.get_page_count())
def test_get_page_count_recursive(self):
"""Recursive search"""
self.assertEqual(1, pdfpagecount.get_page_count(recursive=True))
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment