Last active
July 3, 2021 09:00
-
-
Save GluTbl/99a0571f61e82792ff4508e1084cc800 to your computer and use it in GitHub Desktop.
[Pdf Erorder] #python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import sys | |
import traceback | |
import uuid | |
import time | |
import cv2 | |
import numpy as np | |
from fpdf import FPDF | |
from pdf2image import convert_from_path, pdfinfo_from_path | |
from tqdm import tqdm | |
try: | |
pdf_path:str=sys.argv[1].strip() | |
print(f"Prcessing :{pdf_path}") | |
# print(sys.argv) | |
except: | |
print("Imvalid argument found in python") | |
sys.exit() | |
if not pdf_path.endswith(".pdf"): | |
print("The given file is not pdf") | |
sys.exit() | |
if not os.path.isfile(pdf_path): | |
print(f"No such path \"{pdf_path}\"") | |
sys.exit() | |
WORKING = os.path.split(pdf_path)[0] | |
############################# | |
def show_list_progress( | |
the_list, | |
interactive: bool = True, | |
unit: str = "file", | |
description: str = None, | |
): | |
if not interactive: | |
return the_list | |
else: | |
return tqdm( | |
the_list, | |
dynamic_ncols=True, | |
unit=unit, | |
desc=description, | |
bar_format="{l_bar}{bar}|[{elapsed}<{remaining}, {rate_fmt}]", | |
) | |
def get_file_in_subfolder(path): | |
onlyfiles = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] | |
return onlyfiles | |
############################# | |
tmp_folder_name = "tmp_" + str(uuid.uuid4()) | |
TMP_FOLDER = os.path.join(WORKING, tmp_folder_name) | |
###################### | |
if os.path.isfile(TMP_FOLDER) or os.path.isdir(TMP_FOLDER): | |
print(f'"{TMP_FOLDER}" is already exist!!!') | |
sys.exit() | |
os.makedirs(TMP_FOLDER) | |
###################### | |
info = pdfinfo_from_path(pdf_path, userpw=None, poppler_path=None) | |
maxPages = info["Pages"] | |
PAGE_CHUNK = 2 | |
for page in show_list_progress(range(1, maxPages + 1, PAGE_CHUNK), description="Spliting pages to images", unit="page"): | |
images_chunk = convert_from_path(pdf_path, dpi=600, first_page=page, last_page=min(page + PAGE_CHUNK - 1, maxPages)) | |
count = page - 1 | |
for image in images_chunk: | |
count += 1 | |
image.save(os.path.join(TMP_FOLDER, f'{count}.jpg'), 'JPEG') | |
############################# | |
# lets modified the images_chunk | |
eroded_folder = os.path.join(TMP_FOLDER, "eroded") | |
os.makedirs(eroded_folder) | |
iamge_list = get_file_in_subfolder(TMP_FOLDER) | |
for img in show_list_progress(iamge_list, unit="image", description="eroding emages..."): | |
out = os.path.join(eroded_folder, os.path.split(img)[1]) | |
kernel = np.ones((2, 2), np.uint8) | |
#kernel = np.ones((4, 4), np.uint8) | |
img_grey = cv2.imread(img, cv2.IMREAD_GRAYSCALE) | |
img_erosion = cv2.erode(img_grey, kernel, iterations=4) | |
# /////////////greay scalling | |
thresh = 128 | |
img_binary = cv2.threshold(img_erosion, thresh, 255, cv2.THRESH_BINARY)[1] | |
#save image | |
cv2.imwrite(out, img_binary) | |
# cv2.imwrite(out, img_erosion) | |
def make_pdf_file(filename_list, parent_image_folder, original_pdf: str): | |
suffix = "_eroded_pdf_file" | |
pdf_file = os.path.join(WORKING, os.path.split(original_pdf)[1] + f"{suffix}.pdf") | |
odd_file = os.path.join(WORKING, os.path.split(original_pdf)[1] + f"{suffix}_odd_actual_even_reversed_second.pdf") | |
even_file = os.path.join(WORKING, os.path.split(original_pdf)[1] + f"{suffix}_even_actiual_odd_first.pdf") | |
if os.path.isfile(pdf_file): | |
print("Already printed") | |
sys.exit() | |
pdf = FPDF('P', 'mm', 'A4') | |
odd_file_paths = [] | |
even_file_paths = [] | |
for num in range(1, len(filename_list) + 1): | |
image_name = os.path.join(parent_image_folder, str(num)) + ".jpg" | |
if num % 2 == 0: | |
even_file_paths.append(image_name) | |
else: | |
odd_file_paths.append(image_name) | |
pdf.add_page() | |
pdf.image(image_name, 0, 0, 210, 297) | |
print(f"write images: {pdf_file}") | |
pdf.output(pdf_file, "F") | |
######################### | |
### | |
# check page count | |
flag_page_mitch_match_odd = False | |
if len(odd_file_paths) != len(even_file_paths): | |
if len(odd_file_paths) > len(even_file_paths): | |
flag_page_mitch_match_odd = True | |
even_file_paths.append(None) | |
#raise Exception("Alert.. page miscount") | |
if len(odd_file_paths) < len(even_file_paths): | |
flag_page_mitch_match_odd = True | |
odd_file_paths.append(None) | |
raise Exception("Alert.. page miscount") | |
### | |
if len(odd_file_paths) != len(even_file_paths): | |
raise Exception("Alert.. page miscount[Final]") | |
odd_pdf = FPDF('P', 'mm', 'A4') | |
odd_file_paths.reverse() | |
for filename in odd_file_paths: | |
odd_pdf.add_page() | |
if filename is None: | |
continue | |
odd_pdf.image(filename, 0, 0, 210, 297) | |
print(f"write images: {odd_file}") | |
odd_pdf.output(odd_file, "F") | |
##################### | |
even_pdf = FPDF('P', 'mm', 'A4') | |
for filename in even_file_paths: | |
even_pdf.add_page() | |
if filename is None: | |
continue | |
even_pdf.image(filename, 0, 0, 210, 297) | |
print(f"write images: {even_file}") | |
even_pdf.output(even_file, "F") | |
try: | |
make_pdf_file(get_file_in_subfolder(eroded_folder), eroded_folder, pdf_path) | |
except Exception as e: | |
traceback.print_exc() | |
pass | |
time.sleep(2) | |
shutil.rmtree(TMP_FOLDER) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if test "$#" -ne 1; then | |
echo "Illegal number of parameters" | |
exit | |
fi | |
re='^[0-9]+$' | |
if [[ $1 =~ $re ]] ; then | |
echo "Please pass a device namet!!!" | |
exit | |
fi | |
FILE=$PWD/$1 | |
if test -f "$FILE"; then | |
real_pdf=$FILE | |
elif test -f "$1"; then | |
real_pdf=$1 | |
fi | |
if [[ -z "${real_pdf}" ]]; then | |
echo $1" doesnot exist" | |
else | |
echo $real_pdf | |
python3 ./pdf_eroder.py "$real_pdf" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment