Skip to content

Instantly share code, notes, and snippets.

@jtmoon79
Created September 13, 2023 18:43
Show Gist options
  • Save jtmoon79/3689ec39fbd804f4683c226d5ee74a67 to your computer and use it in GitHub Desktop.
Save jtmoon79/3689ec39fbd804f4683c226d5ee74a67 to your computer and use it in GitHub Desktop.
Rename PDF files downloaded from paperlessemployee.com
#!/usr/bin/env bash
#
# rename pdf files from paperlessemployee.com
#
set -euo pipefail
if [[ ${#} -ne 1 ]] && [[ ${#} -ne 2 ]]; then
echo "usage:
$(basename "${0}") SelectedPayStatements.pdf [Agency Name]
about:
rename pay statements PDF files from www.paperlessemployee.com from default name 'SelectedPayStatements.pdf'
to 'My Agency Pay Statement YYYY-MM-DD to YYYY-MM-DD (X USD).pdf'
e.g.
'Super Talent Pay Statement 2023-01-21 to 2023-01-28 (total 1,234.00 USD) (final 1,000.00 USD).pdf'
Presumes the pay statement is only one pay statement and not multiple pay statements
within one PDF file.
"
exit
fi
pdf_file=${1-}
agency_name=${2-}
if [[ ! -r "${pdf_file}" ]]; then
echo "ERROR Cannot read file '${pdf_file}'" >&2
exit 1
fi
if [[ "${pdf_file:(-4)}" != '.pdf' ]]; then
echo "ERROR must pass a file ending with .pdf, given '${pdf_file}'" >&2
exit 1
fi
function remove_leading_0 () {
# remove all leading '0' from the passed $1 string
echo -n "${1}" | sed -Ee 's/^0+//g'
}
function is_positive_number () {
# is the passed number greater than 0?
declare -i num=$(remove_leading_0 "${1}")
[[ ${num} -gt 0 ]]
}
function trim_whitespace () {
# delete any whitespace surrounding the piped text
sed -e 's#[\t ]\+\(.\+\)[\t ]\+#\1#'
}
function remove_blank_lines () {
# delete any blank lines in the piped text
sed -Ee '/^[[:space:]]*$/d'
}
function print_line () {
# print line number $1 from the stdin text (piped data)
sed -n "${1}p"
}
if ! which pdftotext &>/dev/null; then
echo "ERROR cannot find 'pdftotext' in the PATH" >&2
echo " pdftotext is part of apt package 'poppler-utils'" >&2
exit 1
fi
text=$(pdftotext "${pdf_file}" -)
text=$(echo -n "${text}" | remove_blank_lines)
agency_name_append=' '
if [[ -z "${agency_name}" ]]; then
agency_name_append=''
fi
new_name="${agency_name}${agency_name_append}Pay Statement"
# find the unique string before the date text
# Example text around the week dates:
#
# Pay Period Begin:
# 07/30/2023
# Pay Period End:
# 08/05/2023
#
if ! text_dates=$(echo "${text}" | grep -A3 -Ee '^Pay Period Begin:'); then
echo "ERROR failed to find unique string 'Pay Period Begin' in '${pdf_file}'" >&2
exit 1
fi
echo -e "text_dates:\n${text_dates}"
echo
text_date_start=$(echo -n "${text_dates}" | print_line 2 | trim_whitespace)
text_date_end=$(echo -n "${text_dates}" | print_line 4 | trim_whitespace)
echo "text_date_start: '${text_date_start}'"
echo "text_date_end: '${text_date_end}'"
#
# date text 07/30/2023
# index 0123456789
#
# re-arrange from 'MM/DD/YYYY' to 'YYYY-MM-DD'
#
ds_y=${text_date_start:6:4} # Date Start Year
ds_m=${text_date_start:0:2} # Date Start Month
ds_d=${text_date_start:3:2} # Date Start Day
is_positive_number ${ds_y} || { echo "ERROR: bad year '${ds_y}' in Date Start string '${text_date_start}'" >&2; exit 1; }
is_positive_number ${ds_m} || { echo "ERROR: bad month '${ds_m}' in Date Start string '${text_date_start}'" >&2; exit 1; }
is_positive_number ${ds_d} || { echo "ERROR: bad day '${ds_d}' in Date Start string '${text_date_start}'" >&2; exit 1; }
de_y=${text_date_end:6:4} # Date End Year
de_m=${text_date_end:0:2} # Date End Month
de_d=${text_date_end:3:2} # Date End Day
is_positive_number ${de_y} || { echo "ERROR: bad year '${de_y}' in Date End string '${text_date_end}'" >&2; exit 1; }
is_positive_number ${de_m} || { echo "ERROR: bad month '${de_m}' in Date End string '${text_date_end}'" >&2; exit 1; }
is_positive_number ${de_d} || { echo "ERROR: bad day '${de_d}' in Date End string '${text_date_end}'" >&2; exit 1; }
new_name+=" ${ds_y}-${ds_m}-${ds_d} to ${de_y}-${de_m}-${de_d}"
# find the unique strings that describe pay
# Example of such text:
#
# TOTAL
# TAXES
# TOTAL
# DEDUCTIONS
# NET PAY
# Current:
# 1,234.00
# 123.45
# 12.34
# 1,234.12
# 1,234.12
# 1,234.12
# YTD:
# 9,999.99
# 999.99
# 99.99
# 9,900.00
# 9,000.00
# 9,000.00
# 0.00
# 111.11
# 222.22
# 3,333.33
# 0.00
# 1,444.44
# 333.33
# 3,210.00
# LEAVE BALANCE
#
if text_pay_raw=$(echo "${text}" | grep -A12 -Ee '^NET PAY'); then
# pay total current period
if text_pay=$(echo -n "${text_pay_raw}" | print_line 3 | trim_whitespace); then
declare -i text_pay_num=${text_pay%%.??} # trim trailing '.00' (sub-dollar amount)
if [[ "${text_pay}" != '' ]] && [[ "${text_pay_num}" -gt 0 ]]; then
echo "text_pay (total): '${text_pay}'"
new_name+=" (total ${text_pay} USD)"
else
echo "Failed to convert text_pay total" >&2
fi
else
echo "Failed to extract text_pay total from text_pay_raw" >&2
fi
# pay final current period (after taxes amount)
if text_pay=$(echo -n "${text_pay_raw}" | grep -B1 -Ee '^YTD:' | print_line 1 | trim_whitespace); then
declare -i text_pay_num=${text_pay%%.??} # trim trailing '.00' (sub-dollar amount)
if [[ "${text_pay}" != '' ]] && [[ "${text_pay_num}" -gt 0 ]]; then
echo "text_pay (final): '${text_pay}'"
new_name+=" (final ${text_pay} USD)"
else
echo "Failed to convert text_pay final" >&2
fi
else
echo "Failed to extract text_pay final from text_pay_raw" >&2
fi
else
echo "Failed to find NET PAY" >&2
fi
# find the unique string that describe hours
# Example of such text:
#
# Current Hours
# Current Amt
# YTD Hours
# YTD Amt
# 40.00
# 1,234.00
# 123.00
# 99,999.00
# 40.00
# 123.00
# 99,999.00
# EARNINGS
#
if text_hours_raw=$(echo "${text}" | grep -A5 -Ee '^Current Hours'); then
text_hours=$(echo "${text_hours_raw}" | print_line 5 | trim_whitespace)
if [[ "${text_hours}" != '' ]]; then
echo "text_hours : '${text_hours}'"
new_name+=" (${text_hours} hours)"
else
echo "Failed to extract text_hours from text_hours_raw" >&2
fi
fi
new_name+=".pdf"
echo "new_name: '${new_name}'"
echo
pdf_path=$(dirname -- "${pdf_file}")
set -x
mv -v "${pdf_file}" "${pdf_path}/${new_name}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment