Last active
December 25, 2022 23:57
-
-
Save adamori/76abb33114b7af21ccaf921e3ec2937e to your computer and use it in GitHub Desktop.
This script regularly checks a specified folder for new PDF files, extracts certain data from them, and sends the PDF files to a specified email address with the extracted data included in the subject and body of the email message. https://youtu.be/UnhiEivi0J0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set the email server and login credentials | |
$smtpServer = "smtp.gmail.com" | |
$username = "[email protected]" | |
$password = "password" | |
$sendTo = "[email protected]" | |
# Create a PSCredential object using the login credentials | |
$credential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $username, $(ConvertTo-SecureString -String $password -AsPlainText -Force) | |
$itextsharpdestination = "C:\Users\flame\Desktop\ps_pdf\itextsharp.dll" | |
Add-Type -Path "$itextsharpdestination" | |
$folderPath = Read-Host "Enter the path of the folder to check for new PDF files" | |
$attempts = 0 | |
while (!(Test-Path -Path $folderPath -PathType Container)) { | |
Write-Output "Invalid folder path. Please try again." | |
$folderPath = Read-Host "Enter the path of the folder to check for new PDF files" | |
$attempts++ | |
if ($attempts -ge 3) { | |
Write-Output "Too many attempts. Exiting script." | |
break | |
} | |
} | |
function CheckForNewPdfs($folderPath) { | |
# Read the list of sent PDF file names | |
$sentPdfFileNames = Get-Content "$folderPath\sent_pdfs.txt" | |
# Search the specified folder for new PDF files | |
$newPdfFiles = Get-ChildItem $folderPath -Filter "*.pdf" | Where-Object {$sentPdfFileNames -notcontains $_.Name} | |
# Return the list of new PDF files | |
return $newPdfFiles | |
} | |
function ParsePdfData($pdfContent) { | |
# Initialize a dictionary to store the extracted data | |
$data = @{} | |
# Extract the company name using string manipulation | |
$buyer = [regex]::Match($pdfContent, "ПОКУПАТЕЛЬ: +(.+$)", "m").Groups[1].Value | |
# Extract the company name using string manipulation | |
$company = [regex]::Match($pdfContent, "(ООО|ЗАО).+$", "m").Value | |
# Extract the customer code using a regular expression | |
$customerCode = [regex]::Match($pdfContent, "Код клиента: (.*)", "m").Groups[1].Value | |
# Extract the payment method using a regular expression | |
$paymentMethod = [regex]::Match($pdfContent, "((МЕТОД ОПЛАТЫ).+$\n^(\D*))(\d+)", "m").Groups[3].Value | |
# Extract the invoice number using a regular expression | |
$invoiceNumber = [regex]::Match($pdfContent, "((МЕТОД ОПЛАТЫ).+$\n^(\D*))(\d+)", "m").Groups[4].Value | |
# Extract the total amount using a regular expression | |
$totalAmount = [regex]::Match($pdfContent, "ИТОГО +(\d+)", "m").Groups[1].Value | |
# Store the extracted data in the dictionary | |
$data.Add("Buyer", $buyer.Trim()) | |
$data.Add("Company", $company.Trim()) | |
$data.Add("CustomerCode", $customerCode.Trim()) | |
$data.Add("PaymentMethod", $paymentMethod.Trim()) | |
$data.Add("InvoiceNumber", $invoiceNumber.Trim()) | |
$data.Add("TotalAmount", $totalAmount.Trim()) | |
# Return the dictionary | |
return $data | |
} | |
function SendEmail($data, $attachmentPath) { | |
# Set the email subject and body | |
$subject = "$($data["Company"]), $($data["TotalAmount"]) euro" | |
$body = "Ostja: $($data["Buyer"])<br>" | |
$body += "Ettevõte: $($data["Company"])<br>" | |
$body += "Kliendi kood: $($data["CustomerCode"])<br>" | |
$body += "Maksmise viis: $($data["PaymentMethod"])" | |
$body += " Arve number: $($data["InvoiceNumber"])<br>" | |
$body += "Kokku: $($data["TotalAmount"]) euro" | |
# Set the email recipient and sender | |
$to = $sendTo | |
$from = $username | |
# Send the email | |
Send-MailMessage -SmtpServer $smtpServer -Credential $credential -To $to -From $from -Subject $subject -Body $body -BodyAsHtml -Attachments $attachmentPath -UseSsl -Encoding utf-8 | |
} | |
if (!(Test-Path "$folderPath\sent_pdfs.txt")) { | |
New-Item -Path "$folderPath\sent_pdfs.txt" -ItemType File | |
} | |
$newPdfs = CheckForNewPdfs($folderPath) | |
$sentMails = 0 | |
foreach ($pdfFile in $newPdfs) { | |
$pdfFullPath = $pdfFile.FullName | |
# Load the PDF file using iTextSharp | |
$reader = New-Object iTextSharp.text.pdf.PdfReader($pdfFile.FullName) | |
# Extract the text from the PDF file | |
$pdfContent = [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader, 1) | |
# Close the PDF reader | |
$reader.Close() | |
$parsed = ParsePdfData($pdfContent) | |
SendEmail -data $parsed -attachmentPath $pdfFullPath | |
# Save the name of the PDF file in the sent_pdfs.txt file | |
$pdfFilename = Split-Path -Leaf $pdfFullPath | |
$pdfFilename | Out-File -FilePath "$folderPath\sent_pdfs.txt" -Append | |
$sentMails += 1 | |
} | |
if (!$newPdfs) { | |
Write-Output "New pdf documents did not found" | |
} else { | |
Write-Output "Sent $sentMails mails" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Youtube link: https://youtu.be/UnhiEivi0J0