Last active
March 5, 2024 10:26
-
-
Save KrustyHack/2f5898db0d2571959b48 to your computer and use it in GitHub Desktop.
Scrape a website with wget and get uniques emails from it or scrap websites whois to get email
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
DOMAINS=$1 | |
TMP_DIR=$2 | |
if [[ "$DOMAINS" == "" || "$TMP_DIR" == "" ]] ; then | |
echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) TMP_DIR" | |
echo -e "Example : ./script.sh mydomains.txt /tmp" | |
exit 1 | |
fi | |
while read domain; do | |
echo -e "----- Start scraping $domain - Please wait -----" | |
wget \ | |
--quiet \ | |
--recursive \ | |
--page-requisites \ | |
--html-extension \ | |
--convert-links \ | |
--restrict-file-names=windows \ | |
--domains $domain, www.$domain \ | |
--no-parent \ | |
--directory-prefix=$TMP_DIR \ | |
--progress=bar \ | |
--tries=3 \ | |
$domain | |
done < "$DOMAINS" | |
wait | |
### Split scrap and grep to prepare multiprocess script (coming soon) | |
while read domain; do | |
echo -e "----- Start email finding and filtering for $domain - Please wait -----" | |
grep -R -E -oh "\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b" $TMP_DIR/$domain/* | sort |uniq > $TMP_DIR/$domain.emails.txt | |
echo -e "----- Emails saved to $TMP_DIR/$domain.emails.txt -----" | |
done < "$DOMAINS" | |
wait | |
echo -e "----- Finished -----" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
DOMAINS=$1 | |
OUTPUT=$2 | |
if [[ "$DOMAINS" == "" || "$OUTPUT" == "" ]] ; then | |
echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) OUTPUT.txt" | |
echo -e "Example : ./script.sh mydomains.txt /tmp/output.txt" | |
exit 1 | |
fi | |
mkdir -p /tmp/ | |
touch /tmp/krusty.hack | |
while read domain; do | |
echo -e "Looking $domain" | |
whois $domain |grep -E -o "\b[a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" >> /tmp/krusty.hack | |
sleep 10 | |
done < "$DOMAINS" | |
sort /tmp/krusty.hack |uniq > $OUTPUT | |
rm /tmp/krusty.hack |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ay @Kaos-Industries, did you try what @dodge107 pointed out ? Does the script work properly now ?