Skip to content

Instantly share code, notes, and snippets.

@gdlmx
Last active June 24, 2024 06:10
Show Gist options
  • Save gdlmx/33b53406efd769c9464a105bdc350eb8 to your computer and use it in GitHub Desktop.
Save gdlmx/33b53406efd769c9464a105bdc350eb8 to your computer and use it in GitHub Desktop.
Split pdf file into chunks of pages
[build-system]
requires = ["hatchling>=1.24.2", "hatch-vcs>=0.3.0"]
build-backend = "hatchling.build"
[project]
name = "pdfsplit"
description = ""
readme = "README.md"
license = "MIT"
requires-python = ">=3.11"
version = "0.0.1"
keywords = []
authors = []
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
]
dependencies = [
"pypdf"
]
[tool.hatch.build.targets.sdist]
exclude = [
"/.github",
"/backend",
"/scripts",
]
import sys
from pypdf import PdfWriter, PdfReader
try:
from itertools import batched
except ImportError:
def batched(iterable, n=10):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx:min(ndx + n, l)]
def main():
fname, n = sys.argv[1:]
n = int(n)
with open(fname, "rb") as f_pdf_in:
inputpdf = PdfReader(f_pdf_in)
for i, pages in enumerate(batched(inputpdf.pages, n)):
output = PdfWriter()
for p in pages:
output.add_page(p)
with open("document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment