Created
July 9, 2018 04:08
-
-
Save McJoppy/2b3a414177ba17497c5bc7cdc133dc9f to your computer and use it in GitHub Desktop.
Retrieve list of Bot user agents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Original script https://gist.github.com/mateusz/990b37e4f37db299de4e | |
// Changed to replace whitespace with regex \s | |
echo "map \$http_user_agent \$limit_bots {\n"; | |
echo " default \"\";\n\n"; | |
$data = json_decode(file_get_contents("https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json"), true); | |
$patterns = []; | |
foreach($data as $item) { | |
$patterns[] = str_replace(' ', '\s', $item['pattern']); | |
if(sizeof($patterns) > 8) { | |
echo " ~*(" . implode('|', $patterns) . ") \$http_user_agent;\n"; | |
$patterns = []; | |
} | |
} | |
if($patterns) echo " ~*(" . implode('|', $patterns) . ") \$http_user_agent;\n"; | |
echo "}\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment