Skip to content

Instantly share code, notes, and snippets.

@GottZ
Created October 15, 2024 17:23
Show Gist options
  • Save GottZ/a15d53a6236960a46ad8ecf382335765 to your computer and use it in GitHub Desktop.
Save GottZ/a15d53a6236960a46ad8ecf382335765 to your computer and use it in GitHub Desktop.
take it with a grain of salt. I made this today, to figure out certain quality aspects of pdf's we received from scanning contractors, to see how they handle big and small pages compared to classic A4 ones. totally a proof of concept. nothing refined to see here.
import pdfplumber
import sys
import glob
if len(sys.argv) < 2:
print("Usage: pdfdpi.py <file1> <file2> ...")
sys.exit(1)
files = []
for arg in sys.argv[1:]:
for fname in glob.glob(arg):
files.append(fname)
for fname in files:
with pdfplumber.open(fname) as pdf:
print(f"File: '{fname}' with {len(pdf.pages)} pages")
for page in pdf.pages:
pn = page.page_number
if not page.images:
print(f"{pn}: does not contain a scanned image")
continue
pw = page.width
ph = page.height
out = f"{pn} {round(pw/72*25.4)}x{round(ph/72*25.4)}mm DPI: "
l = len(out)
for idx, i in enumerate(page.images):
iw = i["width"]
ih = i["height"]
(iW, iH) = i["srcsize"]
iiw = iw / 72
iih = ih / 72
dw = iW / iiw
dh = iH / iih
pre = out
if idx > 0:
out += ", "
color = "unknown"
cspace = str(i["colorspace"][0])
if "colorspace" in i:
if "RGB" in cspace:
color = "RGB"
elif "CMYK" in cspace:
color = "CMYK"
elif "Gray" in cspace:
color = "BW"
issquare = True if round(dw) == round(dh) else False
dpi = round(dw) if issquare else f"{round(dw)}x{round(dh)}"
out += f"{dpi}"
if color != "RGB":
out += f" {color}"
print(out)
print("Press any key to exit")
input()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment