Created
May 19, 2025 16:03
-
-
Save colehocking/90d2e82c47e499bfff68c7b5af29cfd2 to your computer and use it in GitHub Desktop.
Why Would you put IOCs in a PDF?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Extract a line-separated list of DNS and IPv4 IOCs from a pdf | |
# Assumes the IOCs are "fanged" and de-fangs them | |
# requires pdftotext application | |
# -- Cole Hocking | |
PDF_FILE="$1" | |
# Reference text file with same basename | |
FILENAME="$(basename -- "${PDF_FILE}")" | |
# file extension | |
EXT="${FILENAME##*.}" | |
# file name without extension | |
FILE_HEAD="${FILENAME%.*}" | |
# Text file conversion | |
TXT_FILE="${FILE_HEAD}.txt" | |
# File containing only IPs | |
IOC_FILE="${FILE_HEAD}_ioc_list.txt" | |
# Convert pdf to text file | |
# requires 'pdftotext' | |
# (install with: sudo apt install poppler-utils) | |
convert_pdf(){ | |
# produces a .txt file with the same name as the pdf | |
pdftotext "${PDF_FILE}" | |
} | |
# extract IP addresses | |
extract_ips(){ | |
# Check to ensure the text file was created | |
if [[ -f "${TXT_FILE}" ]]; then | |
# Extract only the IPs | |
# sed removes the brackets; awk removes the extra line-feed chars | |
grep -E '[0-9]{1,3}\[\.\][0-9]{1,3}\[\.\][0-9]{1,3}\[\.\][0-9]{1,3}' "${TXT_FILE}" | sed 's/[][]//g' | awk '/^\014/{sub("\014","")}1' >> "${IOC_FILE}" | |
else | |
echo "${TXT_FILE} -- File not found. IP" | |
exit 1 | |
fi | |
} | |
# Extract fanged domains | |
extract_domains(){ | |
if [[ -f "${TXT_FILE}" ]]; then | |
egrep 'http://|https://' "${TXT_FILE}" | sort | uniq | awk -F "://" '{print $2}' >> "${IOC_FILE}" | |
else | |
echo "${TXT_FILE} -- File not found." | |
exit 1 | |
fi | |
} | |
main(){ | |
if [[ -f ${PDF_FILE} ]]; then | |
convert_pdf | |
extract_ips | |
extract_domains | |
echo "IOC List Created: ${IOC_FILE}" | |
rm "${TXT_FILE}" | |
else | |
# check that the pdf file is supplied | |
echo "Usage: ./extract_iocs.sh <file>.pdf" | |
exit 1 | |
fi | |
} | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment