Skip to content

Instantly share code, notes, and snippets.

@radiuscz
Last active November 1, 2024 09:43
Show Gist options
  • Save radiuscz/6287438a2089f886c5badafc59e2c359 to your computer and use it in GitHub Desktop.
Save radiuscz/6287438a2089f886c5badafc59e2c359 to your computer and use it in GitHub Desktop.
VuFind - přetížení roboty Semrush, Ahrefs, BLEX
#!/bin/bash
sed -i 's/# Uncomment the following lines/BrowserMatchNoCase SemrushBot|AhrefsBot|BLEXBot|AspiegelBot|SEOkicks|PetalBot|YandexBot|bingbot|DotBot|MJ12bot|Seekport|Adsbot|Amazonbot|DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot bad_bot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot\n Order Deny,Allow\n Deny from env=bad_bot\n\n # Uncomment the following lines/' /usr/local/vufind2/local-*/httpd-vufind.conf
#!/bin/bash
grep Semrush /usr/local/vufind2/local-*/httpd-vufind.conf
grep Semrush /etc/apache2/sites-enabled/*.conf
sed -i 's/DataForSeoBot/DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot/' /usr/local/vufind2/local-*/httpd-vufind.conf
sed -i 's/DataForSeoBot/DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot/' /etc/apache2/sites-enabled/*.conf
BrowserMatchNoCase SemrushBot|AhrefsBot|BLEXBot|AspiegelBot|SEOkicks|PetalBot|YandexBot|bingbot|DotBot|MJ12bot|Seekport|Adsbot|Amazonbot|DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot bad_bot
Order Deny,Allow
Deny from env=bad_bot
# Uncomment the following lines, if you wish to use the Shibboleth authentication
#AuthType shibboleth
#require shibboleth
RewriteCond %{HTTP_USER_AGENT} ^.*SemrushBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AhrefsBot.*$ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*BLEXBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AspiegelBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*SEOkicks [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*PetalBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*YandexBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*bingbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*DotBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*MJ12bot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Seekport [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Adsbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Amazonbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*DataForSeoBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*seoscanners [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*uni-leipzig [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Turnitin [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Bytespider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Barkrowler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*serpstatbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*ImagesiftBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*ClaudeBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*facebookexternalhit [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*FriendlyCrawler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*GPTBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*meta-externalagent [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AwarioBot [NC]
<IfModule mod_rewrite.c>
RewriteCond %{HTTP_USER_AGENT} (SemrushBot|AhrefsBot|BLEXBot|AspiegelBot|SEOkicks|PetalBot|YandexBot|bingbot|DotBot|MJ12bot|Seekport|Adsbot|Amazonbot|DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot) [NC]
RewriteRule .* - [R=403,L]
</IfModule>
server {
listen ...;
server_name ....;
location / {
if ($limit_bots = 1) {
return 404;
}
}
}
http {
map $http_user_agent $limit_bots {
default 0;
~*(SemrushBot) 1;
~*(AhrefsBot) 1;
~*(BLEXBot) 1;
~*(AspiegelBot) 1;
~*(SEOkicks) 1;
~*(PetalBot) 1;
~*(YandexBot) 1;
~*(bingbot) 1;
~*(DotBot) 1;
~*(MJ12bot) 1;
~*(Seekport) 1;
~*(Adsbot) 1;
~*(Amazonbot) 1;
~*(DataForSeoBot) 1;
~*(seoscanners) 1;
~*(uni-leipzig) 1;
~*(Turnitin) 1;
~*(Bytespider) 1;
~*(Barkrowler) 1;
~*(serpstatbot) 1;
~*(ImagesiftBot) 1;
~*(ClaudeBot) 1;
~*(facebookexternalhit) 1;
~*(FriendlyCrawler) 1;
~*(GPTBot) 1;
~*(meta-externalagent) 1;
~*(AwarioBot) 1;
}
...
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment