Created
October 15, 2024 17:23
-
-
Save GottZ/a15d53a6236960a46ad8ecf382335765 to your computer and use it in GitHub Desktop.
take it with a grain of salt. I made this today, to figure out certain quality aspects of pdf's we received from scanning contractors, to see how they handle big and small pages compared to classic A4 ones. totally a proof of concept. nothing refined to see here.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pdfplumber | |
import sys | |
import glob | |
if len(sys.argv) < 2: | |
print("Usage: pdfdpi.py <file1> <file2> ...") | |
sys.exit(1) | |
files = [] | |
for arg in sys.argv[1:]: | |
for fname in glob.glob(arg): | |
files.append(fname) | |
for fname in files: | |
with pdfplumber.open(fname) as pdf: | |
print(f"File: '{fname}' with {len(pdf.pages)} pages") | |
for page in pdf.pages: | |
pn = page.page_number | |
if not page.images: | |
print(f"{pn}: does not contain a scanned image") | |
continue | |
pw = page.width | |
ph = page.height | |
out = f"{pn} {round(pw/72*25.4)}x{round(ph/72*25.4)}mm DPI: " | |
l = len(out) | |
for idx, i in enumerate(page.images): | |
iw = i["width"] | |
ih = i["height"] | |
(iW, iH) = i["srcsize"] | |
iiw = iw / 72 | |
iih = ih / 72 | |
dw = iW / iiw | |
dh = iH / iih | |
pre = out | |
if idx > 0: | |
out += ", " | |
color = "unknown" | |
cspace = str(i["colorspace"][0]) | |
if "colorspace" in i: | |
if "RGB" in cspace: | |
color = "RGB" | |
elif "CMYK" in cspace: | |
color = "CMYK" | |
elif "Gray" in cspace: | |
color = "BW" | |
issquare = True if round(dw) == round(dh) else False | |
dpi = round(dw) if issquare else f"{round(dw)}x{round(dh)}" | |
out += f"{dpi}" | |
if color != "RGB": | |
out += f" {color}" | |
print(out) | |
print("Press any key to exit") | |
input() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment