Skip to content

Instantly share code, notes, and snippets.

@C-Pro
Created October 29, 2015 17:13
Show Gist options
  • Select an option

  • Save C-Pro/669d2cccd937846a9a47 to your computer and use it in GitHub Desktop.

Select an option

Save C-Pro/669d2cccd937846a9a47 to your computer and use it in GitHub Desktop.
import requests
import BaseHTTPServer
import re
import HTMLParser
base_url = 'http://habrahabr.ru'
port = 4000
class TradeMarker(HTMLParser.HTMLParser):
tm_html = u''
last_tag = ''
def handle_starttag(self, tag, attrs):
self.tm_html = self.tm_html + '<' + tag
self.last_tag = tag
for k, v in attrs:
self.tm_html = self.tm_html + \
u" {}='{}'".format(k, v)\
.replace(base_url, 'http://localhost:' + str(port))
self.tm_html = self.tm_html + '>'
HTMLParser.HTMLParser.handle_starttag(self, tag, attrs)
def handle_endtag(self, tag):
self.tm_html = self.tm_html + '</' + tag + '>'
HTMLParser.HTMLParser.handle_endtag(self, tag)
def handle_data(self, data):
if self.last_tag not in ['script', 'meta', 'object']:
self.tm_html = self.tm_html + \
re.sub(r'([^\w]+|^)([\w]{6})([^\w]+|$)',
u'\\1\\2\u2122\\3',
data, flags=re.UNICODE)
else:
self.tm_html = self.tm_html + data
HTMLParser.HTMLParser.handle_data(self, data)
def __init__(self):
self.tm_html = u''
HTMLParser.HTMLParser.__init__(self)
class HabraProxy(BaseHTTPServer.BaseHTTPRequestHandler):
'Simple habraproxy'
def do_HEAD(self):
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
def do_GET(self):
r = requests.get(base_url + self.path)
self.send_response(r.status_code)
self.send_header("Content-type", r.headers['content-type'])
self.end_headers()
if(r.headers['content-type'][:9].lower() == 'text/html'):
parser = TradeMarker()
parser.feed(r.text)
self.wfile.write(parser.tm_html.encode('utf-8'))
else:
self.wfile.write(r.text.encode('utf-8'))
if __name__ == '__main__':
server_class = BaseHTTPServer.HTTPServer
httpd = server_class(('localhost', port), HabraProxy)
httpd.serve_forever()
@C-Pro
Copy link
Author

C-Pro commented Oct 29, 2015

Сделал добавление (tm) только внутри правильных тегов, чтобы страница нормально открывалась с картинками и скриптами и ссылки тоже вели на прокси, а не уводили на хабр.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment