Last active
February 26, 2016 09:04
-
-
Save hodzanassredin/2fee544ada49bf2ca614 to your computer and use it in GitHub Desktop.
answer for https://gist.github.com/anonymous/06e0bd519490c8f03404 correct answer s here https://gist.github.com/valentjedi/2156782b65cb2b164bd4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| import requests | |
| from bottle import run, route | |
| from bs4 import BeautifulSoup | |
| import re | |
| exp = re.compile(r'\b(\w{6})\b') | |
| def replace_6chr_word(word): | |
| if len(word) == 6: | |
| return word + u"\u2122" | |
| else: | |
| return word | |
| #could not work weel with spaces | |
| def replace_all_6char_words(text): | |
| return " ".join([replace_6chr_word(word) for word in text.split()]) | |
| def visible(element): | |
| if element.parent.name in ['style', 'script', '[document]', 'head', 'title']: | |
| return False | |
| elif re.match('<!--.*-->', unicode(element)): | |
| return False | |
| return True | |
| def get_page_and_replace_6char_words(url): | |
| r = requests.get(url) | |
| soup = BeautifulSoup(r.text, "lxml") | |
| texts = soup.findAll(text = True) | |
| visible_texts = filter(visible, texts) | |
| for text in visible_texts: | |
| text.replaceWith(replace_all_6char_words(text)) | |
| return soup.prettify() | |
| @route('<path:path>') | |
| def proxy(path): | |
| url = "http://habrahabr.ru" + path | |
| res = get_page_and_replace_6char_words(url) | |
| return res | |
| if __name__ == '__main__': | |
| run(host='localhost', port=8080) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment