Last active
December 15, 2015 00:58
-
-
Save alexnederlof/5176192 to your computer and use it in GitHub Desktop.
Make list of random websites
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script will create a file with random URLs and a file with random domain names. | |
# It makes use of the site http://www.randomwebsite.com | |
# | |
# You should have curl and grep installed to run this. | |
SITES_FILE=sites.txt | |
SITES_NORMALIZED=sites_normalized.txt | |
for index in {1..1000}; do | |
curl -s http://www.randomwebsite.com/cgi-bin/random.pl -o /dev/null -D headers.txt | |
grep Location headers.txt | cut -d \ -f 2 >> $SITES_FILE | |
rm headers.txt | |
done | |
if [ ! -f $SITES_NORMALIZED ]; | |
then | |
touch $SITES_NORMALIZED | |
fi | |
# Normalize | |
for site in $(cat sites.txt); do | |
if [[ $site == http://* ]] ; then | |
noHttp="${site:7}" | |
fi | |
if [[ $site == https://* ]] ; then | |
noHttp="${site:8}" | |
fi | |
noPath=$(echo $noHttp | cut -f1 -d"/") | |
if grep -Fxq "$noPath" $SITES_NORMALIZED | |
then | |
echo "Already added $noPath"; | |
else | |
echo $noPath >> $SITES_NORMALIZED | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment