Created
October 29, 2015 17:13
-
-
Save C-Pro/669d2cccd937846a9a47 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import BaseHTTPServer | |
| import re | |
| import HTMLParser | |
| base_url = 'http://habrahabr.ru' | |
| port = 4000 | |
| class TradeMarker(HTMLParser.HTMLParser): | |
| tm_html = u'' | |
| last_tag = '' | |
| def handle_starttag(self, tag, attrs): | |
| self.tm_html = self.tm_html + '<' + tag | |
| self.last_tag = tag | |
| for k, v in attrs: | |
| self.tm_html = self.tm_html + \ | |
| u" {}='{}'".format(k, v)\ | |
| .replace(base_url, 'http://localhost:' + str(port)) | |
| self.tm_html = self.tm_html + '>' | |
| HTMLParser.HTMLParser.handle_starttag(self, tag, attrs) | |
| def handle_endtag(self, tag): | |
| self.tm_html = self.tm_html + '</' + tag + '>' | |
| HTMLParser.HTMLParser.handle_endtag(self, tag) | |
| def handle_data(self, data): | |
| if self.last_tag not in ['script', 'meta', 'object']: | |
| self.tm_html = self.tm_html + \ | |
| re.sub(r'([^\w]+|^)([\w]{6})([^\w]+|$)', | |
| u'\\1\\2\u2122\\3', | |
| data, flags=re.UNICODE) | |
| else: | |
| self.tm_html = self.tm_html + data | |
| HTMLParser.HTMLParser.handle_data(self, data) | |
| def __init__(self): | |
| self.tm_html = u'' | |
| HTMLParser.HTMLParser.__init__(self) | |
| class HabraProxy(BaseHTTPServer.BaseHTTPRequestHandler): | |
| 'Simple habraproxy' | |
| def do_HEAD(self): | |
| self.send_response(200) | |
| self.send_header("Content-type", "text/html") | |
| self.end_headers() | |
| def do_GET(self): | |
| r = requests.get(base_url + self.path) | |
| self.send_response(r.status_code) | |
| self.send_header("Content-type", r.headers['content-type']) | |
| self.end_headers() | |
| if(r.headers['content-type'][:9].lower() == 'text/html'): | |
| parser = TradeMarker() | |
| parser.feed(r.text) | |
| self.wfile.write(parser.tm_html.encode('utf-8')) | |
| else: | |
| self.wfile.write(r.text.encode('utf-8')) | |
| if __name__ == '__main__': | |
| server_class = BaseHTTPServer.HTTPServer | |
| httpd = server_class(('localhost', port), HabraProxy) | |
| httpd.serve_forever() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Сделал добавление (tm) только внутри правильных тегов, чтобы страница нормально открывалась с картинками и скриптами и ссылки тоже вели на прокси, а не уводили на хабр.