Created
April 30, 2021 05:32
-
-
Save lordmatt/c010383445459af7420ab07be76004c6 to your computer and use it in GitHub Desktop.
A script to feed junk data to badly behaved data harvesters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
This script is free software: you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 2 of the License, or | |
any later version. | |
This script is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this script . If not, see https://www.gnu.org/licenses/gpl-2.0.html. | |
*/ | |
/** | |
* This scrpt is designed to generate useless data for badly bahaved bots & data | |
* harvisters. They can slurp up this bad data all day long and will probably be | |
* left with an unusable data set. | |
* | |
* Just upload somewhere and link to it with a rel="nofollow" so only bad bots | |
* will find it. | |
* | |
* I've left some notes if you want to customise this for your site. | |
* | |
* You can also use this with WordPress. Add this file to your theme and then | |
* create a page with the stub name-list (title "Name List" should work). | |
* If used as a WordPress page template, no content will be shown other than | |
* what is in this script. | |
* | |
* Note: This is far from an example of highly optimised code. It works but that | |
* is all it does. There are ways to make this better. This is not a good | |
* example code to learn from. | |
* | |
*/ | |
# There are more you could add | |
#to add more add one per line | |
$post_code_bits = "AB | |
AL | |
B | |
BA | |
BR | |
BS | |
BT | |
CA | |
CB | |
CF | |
CT | |
CV | |
CW | |
DA | |
DH | |
EX | |
FK | |
G | |
GL | |
GU | |
HA | |
HD | |
HG | |
HP | |
HR | |
HS | |
HU | |
HX | |
IP | |
KT | |
KW | |
KY | |
L";# last line must end "; | |
# last names - add more here, one per line | |
$surnames= "Smith | |
Jones | |
Taylor | |
Brown | |
Williams | |
Wilson | |
Johnson | |
Davies | |
Patel";# last line must end "; | |
#first names - to add more add one per line | |
$forenames="Olivia | |
Emily | |
Lily | |
Ella | |
Mia | |
Oliver | |
George | |
Harry | |
Jack | |
Noah | |
Charlie | |
Emma | |
Ava | |
Isabella | |
Sophia | |
Taylor | |
Charlotte | |
Evelyn | |
James | |
John | |
Robert | |
Michael | |
William | |
David | |
Richard"; # last line must end "; | |
# common street names - add more, one per line | |
$streets = "High Street | |
Station Road | |
Main Street | |
Park Road | |
Church Road | |
Church Street | |
London Road | |
Victoria Road | |
Green Lane | |
Manor Road | |
Church Lane | |
Park Avenue"; # last line must end "; | |
#email domains | |
#I used domains that had spammed me recently | |
#but popular webmail domains could theoretically go here | |
#as could the domains of ethically questionable businesses | |
#or a domain you own if you really like spam | |
#whatever domains you think are reasonable to add, basically. | |
#add one per line | |
$emails="email.ru | |
nospam.com | |
fastmail.ru | |
hr.dailymail.co.uk | |
mymail.ru | |
email.pl | |
fastmail.pl | |
mymail.pl | |
yagoo.website | |
mfsa.info | |
posta-esportivas.info | |
prawneserwisy.pl | |
forcemix.online | |
hitmail.com | |
hitmail.co.uk | |
hitmail.fun | |
yaho.com | |
hitmail.co.uk | |
hitmail.site | |
pushmail.fun | |
gleella.buzz | |
webmailpro.pw | |
webmail2.site | |
webproton.site | |
webmailaccount.site | |
webmailm1.site"; # last line must end "; | |
#stop editing here unless you want to play with the formatting | |
$surname_ar = explode("\n", $surnames); | |
$forenames_ar = explode("\n", $forenames); | |
$streets_ar = explode("\n", $streets); | |
$emails_ar = explode("\n", $emails); | |
$pc_ar = explode("\n", $post_code_bits); | |
# functions - leave alone unless you know what you are doing | |
function mb_maybe_mi(){ | |
$mid_in = array('A','B','C','D','E','F','G','H','J','K','L','M','N','P','R','S','T','V'); | |
$mi = ' '; | |
if(mt_rand(0,5)==0){ | |
return $mi; | |
} | |
$mi = $mid_in[mt_rand(0,count($mid_in)-1)] . '. '; | |
if(mt_rand(0,5)==0){ | |
$mi .= $mid_in[mt_rand(0,count($mid_in)-1)] . '. '; | |
} | |
return $mi; | |
} | |
function fake_email($fn,$ln){ | |
$joins = array('-','.','_','-','.','_','+',''); | |
$j = $joins[mt_rand(0,count($joins)-1)]; | |
if(mt_rand(0,5)==0){ | |
return $ln.mt_rand(2,99); | |
} | |
if(mt_rand(0,2)==0){ | |
return "{$fn}{$j}{$ln}"; | |
} | |
if(mt_rand(0,1)==0){ | |
return $fn.mt_rand(2,99); | |
} | |
return "{$ln}{$j}{$fn}"; | |
} | |
function post_coder_mb($pc_ar){ | |
$letter = array('A','B','C','D','E','F','G','H','J','K','L','M'.'N','P','R','S','T','V','W','X','Z'); | |
$postcode = ''; | |
$postcode .= trim($pc_ar[mt_rand(0,count($pc_ar)-1)]); | |
$postcode .= mt_rand(1,19); | |
$postcode .= ' '; | |
$postcode .= mt_rand(1,9); | |
$postcode .= $letter[mt_rand(0,count($letter)-1)]; | |
$postcode .= $letter[mt_rand(0,count($letter)-1)]; | |
return $postcode; | |
} | |
# the loop | |
?> | |
<ul> | |
<?php | |
for ($i = 1; $i <= 100; $i++){ | |
$new_fn = $forenames_ar[mt_rand(0,count($forenames_ar)-1)]; | |
$new_ln = $surname_ar[mt_rand(0,count($surname_ar)-1)]; | |
$new_emdom = $emails_ar[mt_rand(0,count($emails_ar)-1)];; | |
$_new_name = $new_fn.' '.mb_maybe_mi().$new_ln; | |
$_new_adr = mt_rand(1,120) . ' ' . $streets_ar[mt_rand(0,count($streets_ar)-1)]; | |
$_new_email = strtolower(fake_email($new_fn,$new_ln).'@'.$new_emdom); | |
$_new_postcode = post_coder_mb($pc_ar); | |
# The formatting. Edity as you see fit. | |
?> | |
<li> | |
<span class="vcard"> | |
<strong><span class="fn"><?php echo $_new_name; ?></span></strong> of | |
<em><span class="adr"> | |
<span class="street-address"><?php echo $_new_adr; ?></span>, | |
<span class="postal-code"><?php echo $_new_postcode; ?></span>, | |
<span class="country-name">UK</span>. | |
</span></em> | |
<span>Email: | |
<a class="email" href="mailto:<?php echo $_new_email; ?>" rel='nofollow'><?php echo $_new_email; ?></a> | |
</span> | |
</span> | |
</li> | |
<?php | |
} | |
?></ul> | |
<?php | |
# generate unlimited links for badly behaved bots to slurp up lots and lots of | |
# utterly useless data. | |
for ($i = 1; $i <= 10; $i++){ | |
?> | |
<a rel='nofollow' class='do-not-use' href="?list=<?php echo $i ?>-i-<?php echo mt_rand(999,9999999999); ?>&key=<?php echo mt_rand(9,9999999999); ?>"></a> | |
<?php | |
} | |
# other content here: | |
?> | |
<p class='please do not use this data'></p> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment