Skip to content

Instantly share code, notes, and snippets.

@colehocking
Created May 19, 2025 16:03
Show Gist options
  • Save colehocking/90d2e82c47e499bfff68c7b5af29cfd2 to your computer and use it in GitHub Desktop.
Save colehocking/90d2e82c47e499bfff68c7b5af29cfd2 to your computer and use it in GitHub Desktop.
Why Would you put IOCs in a PDF?
#!/bin/bash
# Extract a line-separated list of DNS and IPv4 IOCs from a pdf
# Assumes the IOCs are "fanged" and de-fangs them
# requires pdftotext application
# -- Cole Hocking
PDF_FILE="$1"
# Reference text file with same basename
FILENAME="$(basename -- "${PDF_FILE}")"
# file extension
EXT="${FILENAME##*.}"
# file name without extension
FILE_HEAD="${FILENAME%.*}"
# Text file conversion
TXT_FILE="${FILE_HEAD}.txt"
# File containing only IPs
IOC_FILE="${FILE_HEAD}_ioc_list.txt"
# Convert pdf to text file
# requires 'pdftotext'
# (install with: sudo apt install poppler-utils)
convert_pdf(){
# produces a .txt file with the same name as the pdf
pdftotext "${PDF_FILE}"
}
# extract IP addresses
extract_ips(){
# Check to ensure the text file was created
if [[ -f "${TXT_FILE}" ]]; then
# Extract only the IPs
# sed removes the brackets; awk removes the extra line-feed chars
grep -E '[0-9]{1,3}\[\.\][0-9]{1,3}\[\.\][0-9]{1,3}\[\.\][0-9]{1,3}' "${TXT_FILE}" | sed 's/[][]//g' | awk '/^\014/{sub("\014","")}1' >> "${IOC_FILE}"
else
echo "${TXT_FILE} -- File not found. IP"
exit 1
fi
}
# Extract fanged domains
extract_domains(){
if [[ -f "${TXT_FILE}" ]]; then
egrep 'http://|https://' "${TXT_FILE}" | sort | uniq | awk -F "://" '{print $2}' >> "${IOC_FILE}"
else
echo "${TXT_FILE} -- File not found."
exit 1
fi
}
main(){
if [[ -f ${PDF_FILE} ]]; then
convert_pdf
extract_ips
extract_domains
echo "IOC List Created: ${IOC_FILE}"
rm "${TXT_FILE}"
else
# check that the pdf file is supplied
echo "Usage: ./extract_iocs.sh <file>.pdf"
exit 1
fi
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment