Skip to content

Instantly share code, notes, and snippets.

@radiuscz
Last active February 28, 2025 10:30
Show Gist options
  • Select an option

  • Save radiuscz/6287438a2089f886c5badafc59e2c359 to your computer and use it in GitHub Desktop.

Select an option

Save radiuscz/6287438a2089f886c5badafc59e2c359 to your computer and use it in GitHub Desktop.
VuFind - přetížení roboty Semrush, Ahrefs, BLEX
#!/bin/bash
sed -i 's/# Uncomment the following lines/BrowserMatchNoCase SemrushBot|AhrefsBot|BLEXBot|AspiegelBot|SEOkicks|PetalBot|YandexBot|bingbot|DotBot|MJ12bot|Seekport|Adsbot|Amazonbot|DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot bad_bot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot|MSIE|Googlebot|Firefox\/[0-9][0-9]?\\\.|PPC Mac|Windows NT [4-6]\.|Windows CE|Windows 9.|Chrome\/[1-7][0-9]\\\.|Opera|AliyunSecBot|Owler|iPhone OS [0-9]_|Mac OS X 10_[0-9]_|iPad OS 1[0-3]_|netEstate NE Crawler|iPad OS [0-9]_|__RBIT__\n Order Deny,Allow\n Deny from env=bad_bot\n\n # Uncomment the following lines/' /usr/local/vufind2/local-*/httpd-vufind.conf
#!/bin/bash
# grep Semrush /usr/local/vufind2/local-*/httpd-vufind.conf
grep Semrush /etc/apache2/conf-enabled/vufind*.conf
grep Semrush /etc/apache2/sites-enabled/*.conf
sed -i 's/DataForSeoBot/DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot|MSIE|Googlebot|Firefox\/[0-9][0-9]?\\\.|PPC Mac|Windows NT [4-6]\.|Windows CE|Windows 9.|Chrome\/[1-7][0-9]\\\.|Opera|AliyunSecBot|Owler|iPhone OS [0-9]_|Mac OS X 10_[0-9]_|iPad OS 1[0-3]_|netEstate NE Crawler|iPad OS [0-9]_|__RBIT__/' /etc/apache2/conf-enabled/vufind*.conf
sed -i 's/DataForSeoBot/DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot|MSIE|Googlebot|Firefox\/[0-9][0-9]?\\\.|PPC Mac|Windows NT [4-6]\.|Windows CE|Windows 9.|Chrome\/[1-7][0-9]\\\.|Opera|AliyunSecBot|Owler|iPhone OS [0-9]_|Mac OS X 10_[0-9]_|iPad OS 1[0-3]_|netEstate NE Crawler|iPad OS [0-9]_|__RBIT__/' /etc/apache2/sites-enabled/*.conf
BrowserMatchNoCase "(SemrushBot|AhrefsBot|BLEXBot|AspiegelBot|SEOkicks|PetalBot|YandexBot|bingbot|DotBot|MJ12bot|Seekport|Adsbot|Amazonbot|DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot|MSIE|Googlebot|Firefox\/[0-9][0-9]?\\\.|PPC Mac|Windows NT [4-6]\.|Windows CE|Windows 9.|Chrome\/[1-7][0-9]\\\.|Opera|AliyunSecBot|Owler|iPhone OS [0-9]_|Mac OS X 10_[0-9]_|iPad OS 1[0-3]_|netEstate NE Crawler|iPad OS [0-9]_|__RBIT__)" bad_bot
Order Deny,Allow
Deny from env=bad_bot
# Uncomment the following lines, if you wish to use the Shibboleth authentication
#AuthType shibboleth
#require shibboleth
RewriteCond %{HTTP_USER_AGENT} ^.*SemrushBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AhrefsBot.*$ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*BLEXBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AspiegelBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*SEOkicks [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*PetalBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*YandexBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*bingbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*DotBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*MJ12bot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Seekport [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Adsbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Amazonbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*DataForSeoBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*seoscanners [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*uni-leipzig [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Turnitin [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Bytespider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Barkrowler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*serpstatbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*ImagesiftBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*ClaudeBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*facebookexternalhit [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*FriendlyCrawler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*GPTBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*meta-externalagent [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AwarioBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*MSIE [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Googlebot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Firefox\/[0-9][0-9]?\\\. [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*PPC\ Mac [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Windows\ NT\ [4-6]\. [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Windows\ CE [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Windows\ 9. [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Chrome\/[1-7][0-9]\\\. [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Opera [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*AliyunSecBot[NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Owler[NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*iPhone\ OS\ [0-9]_ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*Mac\ OS\ X\ 10_[0-9]_ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*iPad\ OS\ 1[0-3]_ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*netEstate\ NE\ Crawler [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^.*iPad\ OS\ [0-9]_ [NC]
Googlebot
<IfModule mod_rewrite.c>
RewriteCond %{HTTP_USER_AGENT} "(SemrushBot|AhrefsBot|BLEXBot|AspiegelBot|SEOkicks|PetalBot|YandexBot|bingbot|DotBot|MJ12bot|Seekport|Adsbot|Amazonbot|DataForSeoBot|seoscanners|uni-leipzig|Turnitin|Bytespider|Barkrowler|serpstatbot|ImagesiftBot|ClaudeBot|facebookexternalhit|FriendlyCrawler|GPTBot|meta-externalagent|AwarioBot|MSIE|Googlebot|Firefox\/[0-9][0-9]?\\\.|PPC Mac|Windows NT [4-6]\.|Windows CE|Windows 9.|Chrome\/[1-7][0-9]\\\.|Opera|AliyunSecBot|Owler|iPhone OS [0-9]_|Mac OS X 10_[0-9]_|iPad OS 1[0-3]_|netEstate NE Crawler|iPad OS [0-9]_|__RBIT__)" [NC]
RewriteRule .* - [R=403,L]
</IfModule>
server {
listen ...;
server_name ....;
location / {
if ($limit_bots = 1) {
return 404;
}
}
}
http {
map $http_user_agent $limit_bots {
default 0;
~*(SemrushBot) 1;
~*(AhrefsBot) 1;
~*(BLEXBot) 1;
~*(AspiegelBot) 1;
~*(SEOkicks) 1;
~*(PetalBot) 1;
~*(YandexBot) 1;
~*(bingbot) 1;
~*(DotBot) 1;
~*(MJ12bot) 1;
~*(Seekport) 1;
~*(Adsbot) 1;
~*(Amazonbot) 1;
~*(DataForSeoBot) 1;
~*(seoscanners) 1;
~*(uni-leipzig) 1;
~*(Turnitin) 1;
~*(Bytespider) 1;
~*(Barkrowler) 1;
~*(serpstatbot) 1;
~*(ImagesiftBot) 1;
~*(ClaudeBot) 1;
~*(facebookexternalhit) 1;
~*(FriendlyCrawler) 1;
~*(GPTBot) 1;
~*(meta-externalagent) 1;
~*(AwarioBot) 1;
~*(MSIE) 1;
~*(Googlebot) 1;
~*(Firefox\/[0-9][0-9]?\\\.) 1;
~*(PPC Mac) 1;
~*(Windows NT [4-6]\.) 1;
~*(Windows CE) 1;
~*(Windows 9.) 1;
~*(Chrome\/[1-7][0-9]\\\.) 1;
~*(Opera) 1;
~*(AliyunSecBot) 1;
~*(Owler) 1;
~*(iPhone OS [0-9]_) 1;
~*(Mac OS X 10_[0-9]_) 1;
~*(iPad OS 1[0-3]_) 1;
~*(netEstate NE Crawler) 1;
~*(iPad OS [0-9]_) 1;
}
...
}
@radiuscz
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment