Last active
August 5, 2024 16:02
-
-
Save tolgahanakgun/a4a1cb4ba8ea34b64acb5593f4759592 to your computer and use it in GitHub Desktop.
Return a nice-looking "403 Forbidden" for all HTTP requests to web scrapers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# -*- coding: utf-8 -*- | |
import socket | |
from email.utils import formatdate | |
from ipaddress import ip_address | |
import argparse | |
import socketserver | |
# The "403 Forbidden" page was taken from https://github.com/dr5hn/403 | |
HTML = ( | |
"""<!doctypehtml><html lang=en><meta charset=UTF-8><meta content="width=d""" | |
"""evice-width,initial-scale=1"name=viewport><title>403 Forbidden</title>""" | |
"""<style>@import url(https://fonts.googleapis.com/css?family=Press+Start""" | |
"""+2P);body,html{width:100%;height:100%;margin:0}*{font-family:'Press St""" | |
"""art 2P',cursive;box-sizing:border-box}#app{padding:1rem;background:#00""" | |
"""0;display:flex;height:100%;justify-content:center;align-items:center;c""" | |
"""olor:#54fe55;text-shadow:0 0 10px;font-size:6rem;flex-direction:column""" | |
"""}#app .txt{font-size:1.8rem}@keyframes blink{0%{opacity:0}49%{opacity:""" | |
"""0}50%{opacity:1}100%{opacity:1}}.blink{animation-name:blink;animation-""" | |
"""duration:1s;animation-iteration-count:infinite}</style><div id=app><di""" | |
"""v>403</div><div class=txt>Forbidden<span class=blink>_</span></div></d""" | |
"""iv>\n""" | |
).encode("utf-8") | |
headers = ( | |
"HTTP/1.0 403 Forbidden" + "\n" | |
"Server: wtf 3.1" + "\n" | |
"Date: {date}" + "\n" | |
"Content-type: text/html; charset=utf-8" + "\n" | |
f"Content-length: {len(HTML)}" + "\n" | |
"Connection: close"+ "\n" + "\n" | |
) | |
class ForbiddenResponseHandler(socketserver.BaseRequestHandler): | |
def handle(self): | |
response = headers.format(date=formatdate(timeval=None, localtime=False, usegmt=True)) | |
response = response.encode("utf-8") + HTML | |
try: | |
# recv will timeout in 1 sec if no data was received | |
self.request.settimeout(1) | |
if len(self.request.recv(1024)) == 0: | |
print(f"Client from {self.client_address} disconnected") | |
self.request.close() | |
return | |
print(f"Sending 403 Response to {self.client_address}") | |
self.request.send(response) | |
self.request.close() | |
except socket.timeout: | |
try: | |
print(f"Connection from {self.client_address} timed out, sending 403 Response") | |
self.request.send(response) | |
self.request.close() | |
except: | |
pass | |
def main() -> None: | |
parser = argparse.ArgumentParser(description='Return 403 for all incoming packages for web scrapers') | |
parser.add_argument("-b", "--bind", default="127.0.0.1", type=ip_address, | |
help='Bind server to this IP') | |
parser.add_argument("-p", "--port", default=5667, type=int, | |
help='Bind server to this port') | |
args = parser.parse_args() | |
with socketserver.TCPServer((str(args.bind), args.port), ForbiddenResponseHandler) as httpd: | |
print(f"Serving HTTP on {args.bind} port {args.port} (http://{args.bind}:{args.port}/) ...") | |
httpd.serve_forever() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment