Created
June 24, 2024 12:33
-
-
Save tjarksaul/4089bda3512a87fedf915a2f655a5a48 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import subprocess | |
import tempfile | |
import sys | |
import shutil | |
import glob | |
import requests | |
from datetime import datetime | |
OUT_DIR = '/tmp/scan-inbox/' | |
PAPERLESS_DIR = '/docker/paperless/consume/' | |
NEXTCLOUD_URL = 'https://cloud.cloud/remote.php/dav/files/admin/Scan-Inbox/' | |
NEXTCLOUD_USER = 'nextcloud' | |
NEXTCLOUD_PASSWORD = 'nextcloud' | |
def upload(path: str, fname: str): | |
print("Uploading...") | |
headers = {'Content-type': 'application/octet-stream'} | |
url = f"{NEXTCLOUD_URL}{fname}" | |
auth = (NEXTCLOUD_USER, NEXTCLOUD_PASSWORD) | |
r = requests.put(url, data=open(f"{path}{fname}", 'rb'), headers=headers, auth=auth) | |
os.makedirs(OUT_DIR, exist_ok=True) | |
with tempfile.TemporaryDirectory() as tmpdir: | |
# print(f"Working dir: {tmpdir}") | |
os.chdir(tmpdir) | |
print("Scanning...") | |
try: | |
subprocess.run(['/usr/bin/scanimage', | |
'--resolution', '300', | |
'--source', 'ADF Duplex', | |
'--ald=yes', | |
'--swcrop=yes', | |
'--swskip', '10', | |
'--batch=scan_%03d.tif', '--format=tiff', | |
'--mode', 'Color', | |
'--device-name', 'fujitsu:ScanSnap S1500:2853', | |
'-y', '297', '-x', '210', | |
'--page-width', '210', '--page-height', '297', | |
'--sleeptimer', '1'], check=True) | |
except subprocess.CalledProcessError as e: | |
if e.returncode == 7: | |
print("No document in the paper feed, exiting.") | |
sys.exit(0) | |
else: | |
print("Unknown error in scanimage, exiting") | |
sys.exit(e.returncode) | |
print("Converting to PDF...") | |
subprocess.run(['/usr/bin/tiffcp', *glob.glob('scan_*.tif'), 'output.tif'], check=True) | |
fname_prefix = f"scan_{datetime.now().strftime('%Y%m%d-%H%M%S')}" | |
fname = f"{fname_prefix}_src.pdf" | |
output_pdf = f"{OUT_DIR}{fname}" | |
with open(output_pdf, 'wb') as f: | |
subprocess.run(['/usr/bin/tiff2pdf', 'output.tif', '-j', '-q', '60', '-p', 'A4'], stdout=f, check=True) | |
paperless_file = f"{PAPERLESS_DIR}{fname_prefix}.pdf" | |
shutil.copy(output_pdf, paperless_file) | |
# print(f"Output PDF: ${output_pdf}") | |
final_fname = f"{fname_prefix}.pdf" | |
final_pdf = f"{OUT_DIR}{final_fname}" | |
subprocess.run(['/usr/bin/ocrmypdf', | |
'-r', '-d', '--remove-background', | |
'-l', 'deu+eng', | |
output_pdf, final_pdf], check=True) | |
upload(OUT_DIR, final_fname) | |
print("Done, cleaning up. Bye. 👋") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment