Forked from HalemoGPA/pdf_page_counter_csv_script.py
Created
August 18, 2024 07:16
-
-
Save Rockyspade/7282a5833f6f5c338d2c4c6bafb5f4df to your computer and use it in GitHub Desktop.
PDF Page Counter to CSV (Python)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import csv | |
def count_pdf_pages(file_path): | |
with open(file_path, "rb") as file: | |
content = file.read() | |
return len(re.findall(rb"/Type\s*/Page[^s]", content)) | |
def get_pdf_info(directory): | |
pdf_info = [] | |
for root, _, files in os.walk(directory): | |
for file in files: | |
if file.endswith(".pdf"): | |
pdf_path = os.path.join(root, file) | |
print(f"Reading {pdf_path}") | |
try: | |
num_pages = count_pdf_pages(pdf_path) | |
main_folder = os.path.relpath(root, directory) | |
pdf_info.append([main_folder, file[:-4], num_pages]) | |
except Exception as e: | |
print(f"Could not read {pdf_path}: {e}") | |
return pdf_info | |
def write_to_csv(pdf_info, output_file): | |
with open(output_file, mode="w", newline="", encoding="utf-8") as file: | |
writer = csv.writer(file) | |
writer.writerow(["Main Folder", "PDF Name", "Number of Pages"]) | |
writer.writerows(pdf_info) | |
if __name__ == "__main__": | |
# Change the path to appropriate one | |
directory = r"D:\\Path" | |
output_file = "pdf_info.csv" | |
pdf_info = get_pdf_info(directory) | |
write_to_csv(pdf_info, output_file) | |
print(f"PDF information has been written to {output_file}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
#git(1) Create pull_request merger_status_<script> workflow @hotgazpacho