Skip to content

Instantly share code, notes, and snippets.

@adamori
Last active December 25, 2022 23:57
Show Gist options
  • Save adamori/76abb33114b7af21ccaf921e3ec2937e to your computer and use it in GitHub Desktop.
Save adamori/76abb33114b7af21ccaf921e3ec2937e to your computer and use it in GitHub Desktop.
This script regularly checks a specified folder for new PDF files, extracts certain data from them, and sends the PDF files to a specified email address with the extracted data included in the subject and body of the email message. https://youtu.be/UnhiEivi0J0
# Set the email server and login credentials
$smtpServer = "smtp.gmail.com"
$username = "[email protected]"
$password = "password"
$sendTo = "[email protected]"
# Create a PSCredential object using the login credentials
$credential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $username, $(ConvertTo-SecureString -String $password -AsPlainText -Force)
$itextsharpdestination = "C:\Users\flame\Desktop\ps_pdf\itextsharp.dll"
Add-Type -Path "$itextsharpdestination"
$folderPath = Read-Host "Enter the path of the folder to check for new PDF files"
$attempts = 0
while (!(Test-Path -Path $folderPath -PathType Container)) {
Write-Output "Invalid folder path. Please try again."
$folderPath = Read-Host "Enter the path of the folder to check for new PDF files"
$attempts++
if ($attempts -ge 3) {
Write-Output "Too many attempts. Exiting script."
break
}
}
function CheckForNewPdfs($folderPath) {
# Read the list of sent PDF file names
$sentPdfFileNames = Get-Content "$folderPath\sent_pdfs.txt"
# Search the specified folder for new PDF files
$newPdfFiles = Get-ChildItem $folderPath -Filter "*.pdf" | Where-Object {$sentPdfFileNames -notcontains $_.Name}
# Return the list of new PDF files
return $newPdfFiles
}
function ParsePdfData($pdfContent) {
# Initialize a dictionary to store the extracted data
$data = @{}
# Extract the company name using string manipulation
$buyer = [regex]::Match($pdfContent, "ПОКУПАТЕЛЬ: +(.+$)", "m").Groups[1].Value
# Extract the company name using string manipulation
$company = [regex]::Match($pdfContent, "(ООО|ЗАО).+$", "m").Value
# Extract the customer code using a regular expression
$customerCode = [regex]::Match($pdfContent, "Код клиента: (.*)", "m").Groups[1].Value
# Extract the payment method using a regular expression
$paymentMethod = [regex]::Match($pdfContent, "((МЕТОД ОПЛАТЫ).+$\n^(\D*))(\d+)", "m").Groups[3].Value
# Extract the invoice number using a regular expression
$invoiceNumber = [regex]::Match($pdfContent, "((МЕТОД ОПЛАТЫ).+$\n^(\D*))(\d+)", "m").Groups[4].Value
# Extract the total amount using a regular expression
$totalAmount = [regex]::Match($pdfContent, "ИТОГО +(\d+)", "m").Groups[1].Value
# Store the extracted data in the dictionary
$data.Add("Buyer", $buyer.Trim())
$data.Add("Company", $company.Trim())
$data.Add("CustomerCode", $customerCode.Trim())
$data.Add("PaymentMethod", $paymentMethod.Trim())
$data.Add("InvoiceNumber", $invoiceNumber.Trim())
$data.Add("TotalAmount", $totalAmount.Trim())
# Return the dictionary
return $data
}
function SendEmail($data, $attachmentPath) {
# Set the email subject and body
$subject = "$($data["Company"]), $($data["TotalAmount"]) euro"
$body = "Ostja: $($data["Buyer"])<br>"
$body += "Ettevõte: $($data["Company"])<br>"
$body += "Kliendi kood: $($data["CustomerCode"])<br>"
$body += "Maksmise viis: $($data["PaymentMethod"])"
$body += " Arve number: $($data["InvoiceNumber"])<br>"
$body += "Kokku: $($data["TotalAmount"]) euro"
# Set the email recipient and sender
$to = $sendTo
$from = $username
# Send the email
Send-MailMessage -SmtpServer $smtpServer -Credential $credential -To $to -From $from -Subject $subject -Body $body -BodyAsHtml -Attachments $attachmentPath -UseSsl -Encoding utf-8
}
if (!(Test-Path "$folderPath\sent_pdfs.txt")) {
New-Item -Path "$folderPath\sent_pdfs.txt" -ItemType File
}
$newPdfs = CheckForNewPdfs($folderPath)
$sentMails = 0
foreach ($pdfFile in $newPdfs) {
$pdfFullPath = $pdfFile.FullName
# Load the PDF file using iTextSharp
$reader = New-Object iTextSharp.text.pdf.PdfReader($pdfFile.FullName)
# Extract the text from the PDF file
$pdfContent = [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader, 1)
# Close the PDF reader
$reader.Close()
$parsed = ParsePdfData($pdfContent)
SendEmail -data $parsed -attachmentPath $pdfFullPath
# Save the name of the PDF file in the sent_pdfs.txt file
$pdfFilename = Split-Path -Leaf $pdfFullPath
$pdfFilename | Out-File -FilePath "$folderPath\sent_pdfs.txt" -Append
$sentMails += 1
}
if (!$newPdfs) {
Write-Output "New pdf documents did not found"
} else {
Write-Output "Sent $sentMails mails"
}
@adamori
Copy link
Author

adamori commented Dec 25, 2022

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment