Skip to content

Instantly share code, notes, and snippets.

@hodzanassredin
Last active February 26, 2016 09:04
Show Gist options
  • Save hodzanassredin/2fee544ada49bf2ca614 to your computer and use it in GitHub Desktop.
Save hodzanassredin/2fee544ada49bf2ca614 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import requests
from bottle import run, route
from bs4 import BeautifulSoup
import re
exp = re.compile(r'\b(\w{6})\b')
def replace_6chr_word(word):
if len(word) == 6:
return word + u"\u2122"
else:
return word
#could not work weel with spaces
def replace_all_6char_words(text):
return " ".join([replace_6chr_word(word) for word in text.split()])
def visible(element):
if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
return False
elif re.match('<!--.*-->', unicode(element)):
return False
return True
def get_page_and_replace_6char_words(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
texts = soup.findAll(text = True)
visible_texts = filter(visible, texts)
for text in visible_texts:
text.replaceWith(replace_all_6char_words(text))
return soup.prettify()
@route('<path:path>')
def proxy(path):
url = "http://habrahabr.ru" + path
res = get_page_and_replace_6char_words(url)
return res
if __name__ == '__main__':
run(host='localhost', port=8080)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment