Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Rockyspade/7282a5833f6f5c338d2c4c6bafb5f4df to your computer and use it in GitHub Desktop.
Save Rockyspade/7282a5833f6f5c338d2c4c6bafb5f4df to your computer and use it in GitHub Desktop.
PDF Page Counter to CSV (Python)
import os
import re
import csv
def count_pdf_pages(file_path):
with open(file_path, "rb") as file:
content = file.read()
return len(re.findall(rb"/Type\s*/Page[^s]", content))
def get_pdf_info(directory):
pdf_info = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".pdf"):
pdf_path = os.path.join(root, file)
print(f"Reading {pdf_path}")
try:
num_pages = count_pdf_pages(pdf_path)
main_folder = os.path.relpath(root, directory)
pdf_info.append([main_folder, file[:-4], num_pages])
except Exception as e:
print(f"Could not read {pdf_path}: {e}")
return pdf_info
def write_to_csv(pdf_info, output_file):
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow(["Main Folder", "PDF Name", "Number of Pages"])
writer.writerows(pdf_info)
if __name__ == "__main__":
# Change the path to appropriate one
directory = r"D:\\Path"
output_file = "pdf_info.csv"
pdf_info = get_pdf_info(directory)
write_to_csv(pdf_info, output_file)
print(f"PDF information has been written to {output_file}")
@Rockyspade
Copy link
Author

Rockyspade commented Aug 21, 2024

#git(1) Create pull_request merger_status_<script> workflow @hotgazpacho

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment