ANtlord · August 29, 2015 14:22
diff --git a/gistfile1.py b/gistfile1.py
 #!/usr/bin/python2
 # -*- coding: utf-8 -*-
 import SimpleHTTPServer
 import SocketServer
 import signal
 import sys
 import urllib2
 import gzip
 import zlib
 import webbrowser
 import os
 from bs4 import BeautifulSoup
 from bs4.element import NavigableString
 from StringIO import StringIO


 port = 8000


 def get_url_data(url):
    """Gets data y by pointed url.
    :url: str
    :return: str
    """
    res = urllib2.urlopen(url)
    data = None
    if res.info().get('Content-Encoding') == 'gzip':
        buf = StringIO(res.read())
        f = gzip.GzipFile(fileobj=buf)
        data = f.read()
        f.close()
    else:
        data = res.read()

    return data


 def interpret_data(data):
    """Interprets data by pointer task.
    :type data: unicode
    """
    out_soup = BeautifulSoup()
    soup = BeautifulSoup(data)
    counter = 1

    for item in soup.find_all(True):
        strings = filter(lambda x: type(x) == NavigableString, item.contents)
        if (item.name not in ('script', 'noscript', 'style',) and
                len(strings)):
            i = 0
            for string in strings:
                words = string.split(' ')

                j = counter
                for word in words:
                    if len(word.strip()):
                        if counter % 6 == 0:
                            word += u'™'
                            words[counter - j] = word
                        counter += 1

                string.replace_with(' '.join(words))
                i += 1

        elif item.name in ('script', 'link',):
            path = None
            key = 'src' if item.name == 'script' else 'href'
            path = item.attrs.get(key, None)

            if path and path.find('/') == 0:
                item.attrs[key] = 'http://habrahabr.ru'+path

    # Edits links on page.
    for item in soup.find_all('a'):
        if 'href' in item.attrs:
            item.attrs['href'] = item.attrs['href'].replace(
                'http://habrahabr.ru/', 'http://localhost:%s/' % port)
    return soup


 class MyHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
    def get_habradata(self):
        url = u'http://habrahabr.ru%s' % self.path
        data = get_url_data(url)
        soup = interpret_data(data)
        return soup

    def do_GET(self):
        self.send_response(200, 'OK')
        self.send_header('Content-type', 'text/html; charset=utf8')
        self.end_headers()
        res = self.get_habradata()
        self.wfile.write(res)


 def main():
    if sys.argv[1:]:
        # I know that it is bad practice.
        global port
        port = int(sys.argv[1])

    webbrowser.open_new_tab('http://localhost:%s' % port)
    httpd = SocketServer.TCPServer(('127.0.0.1', port), MyHandler)
    httpd.serve_forever()


 if __name__ == '__main__':
    main()
	#!/usr/bin/python2
	# -- coding: utf-8 --
	import SimpleHTTPServer
	import SocketServer
	import signal
	import sys
	import urllib2
	import gzip
	import zlib
	import webbrowser
	import os
	from bs4 import BeautifulSoup
	from bs4.element import NavigableString
	from StringIO import StringIO


	port = 8000


	def get_url_data(url):
	"""Gets data y by pointed url.
	:url: str
	:return: str
	"""
	res = urllib2.urlopen(url)
	data = None
	if res.info().get('Content-Encoding') == 'gzip':
	buf = StringIO(res.read())
	f = gzip.GzipFile(fileobj=buf)
	data = f.read()
	f.close()
	else:
	data = res.read()

	return data


	def interpret_data(data):
	"""Interprets data by pointer task.
	:type data: unicode
	"""
	out_soup = BeautifulSoup()
	soup = BeautifulSoup(data)
	counter = 1

	for item in soup.find_all(True):
	strings = filter(lambda x: type(x) == NavigableString, item.contents)
	if (item.name not in ('script', 'noscript', 'style',) and
	len(strings)):
	i = 0
	for string in strings:
	words = string.split(' ')

	j = counter
	for word in words:
	if len(word.strip()):
	if counter % 6 == 0:
	word += u'™'
	words[counter - j] = word
	counter += 1

	string.replace_with(' '.join(words))
	i += 1

	elif item.name in ('script', 'link',):
	path = None
	key = 'src' if item.name == 'script' else 'href'
	path = item.attrs.get(key, None)

	if path and path.find('/') == 0:
	item.attrs[key] = 'http://habrahabr.ru'+path

	# Edits links on page.
	for item in soup.find_all('a'):
	if 'href' in item.attrs:
	item.attrs['href'] = item.attrs['href'].replace(
	'http://habrahabr.ru/', 'http://localhost:%s/' % port)
	return soup


	class MyHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
	def get_habradata(self):
	url = u'http://habrahabr.ru%s' % self.path
	data = get_url_data(url)
	soup = interpret_data(data)
	return soup

	def do_GET(self):
	self.send_response(200, 'OK')
	self.send_header('Content-type', 'text/html; charset=utf8')
	self.end_headers()
	res = self.get_habradata()
	self.wfile.write(res)


	def main():
	if sys.argv[1:]:
	# I know that it is bad practice.
	global port
	port = int(sys.argv[1])

	webbrowser.open_new_tab('http://localhost:%s' % port)
	httpd = SocketServer.TCPServer(('127.0.0.1', port), MyHandler)
	httpd.serve_forever()


	if __name__ == '__main__':
	main()
No results found