KrustyHack · March 5, 2024 10:26 · dodge107 · May 7, 2020 · KrustyHack · Jun 1, 2020
diff --git a/web_scraping_emails_website.sh b/web_scraping_emails_website.sh
 #!/bin/bash

 DOMAINS=$1
 TMP_DIR=$2

 if [[ "$DOMAINS" == "" || "$TMP_DIR" == "" ]] ; then
    echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) TMP_DIR"
    echo -e "Example : ./script.sh mydomains.txt /tmp"
    exit 1
 fi

 while read domain; do
    echo -e "----- Start scraping $domain - Please wait -----"
    wget \
      --quiet \
      --recursive \
      --page-requisites \
      --html-extension \
      --convert-links \
      --restrict-file-names=windows \
      --domains $domain, www.$domain \
      --no-parent \
      --directory-prefix=$TMP_DIR \
      --progress=bar \
      --tries=3 \
      $domain
 done < "$DOMAINS"
 wait

 ### Split scrap and grep to prepare multiprocess script (coming soon)
 while read domain; do
    echo -e "----- Start email finding and filtering for $domain - Please wait -----"
    grep -R -E -oh "\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b" $TMP_DIR/$domain/* | sort |uniq > $TMP_DIR/$domain.emails.txt
    echo -e "----- Emails saved to $TMP_DIR/$domain.emails.txt -----"
 done < "$DOMAINS"
 wait

 echo -e "----- Finished -----"
diff --git a/web_scraping_emails_whois.sh b/web_scraping_emails_whois.sh
 #!/bin/bash

 DOMAINS=$1
 OUTPUT=$2

 if [[ "$DOMAINS" == "" || "$OUTPUT" == "" ]] ; then
    echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) OUTPUT.txt"
    echo -e "Example : ./script.sh mydomains.txt /tmp/output.txt"
    exit 1
 fi

 mkdir -p /tmp/
 touch /tmp/krusty.hack

 while read domain; do
    echo -e "Looking $domain"
    whois $domain |grep -E -o "\b[a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" >> /tmp/krusty.hack
    sleep 10
 done < "$DOMAINS"

 sort /tmp/krusty.hack |uniq > $OUTPUT
 rm /tmp/krusty.hack
	#!/bin/bash

	DOMAINS=$1
	TMP_DIR=$2

	if [[ "$DOMAINS" == "" \|\| "$TMP_DIR" == "" ]] ; then
	echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) TMP_DIR"
	echo -e "Example : ./script.sh mydomains.txt /tmp"
	exit 1
	fi

	while read domain; do
	echo -e "----- Start scraping $domain - Please wait -----"
	wget \
	--quiet \
	--recursive \
	--page-requisites \
	--html-extension \
	--convert-links \
	--restrict-file-names=windows \
	--domains $domain, www.$domain \
	--no-parent \
	--directory-prefix=$TMP_DIR \
	--progress=bar \
	--tries=3 \
	$domain
	done < "$DOMAINS"
	wait

	### Split scrap and grep to prepare multiprocess script (coming soon)
	while read domain; do
	echo -e "----- Start email finding and filtering for $domain - Please wait -----"
	grep -R -E -oh "\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b" $TMP_DIR/$domain/* \| sort \|uniq > $TMP_DIR/$domain.emails.txt
	echo -e "----- Emails saved to $TMP_DIR/$domain.emails.txt -----"
	done < "$DOMAINS"
	wait

	echo -e "----- Finished -----"
	#!/bin/bash

	DOMAINS=$1
	OUTPUT=$2

	if [[ "$DOMAINS" == "" \|\| "$OUTPUT" == "" ]] ; then
	echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) OUTPUT.txt"
	echo -e "Example : ./script.sh mydomains.txt /tmp/output.txt"
	exit 1
	fi

	mkdir -p /tmp/
	touch /tmp/krusty.hack

	while read domain; do
	echo -e "Looking $domain"
	whois $domain \|grep -E -o "\b[a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" >> /tmp/krusty.hack
	sleep 10
	done < "$DOMAINS"

	sort /tmp/krusty.hack \|uniq > $OUTPUT
	rm /tmp/krusty.hack