Created
July 20, 2016 18:40
-
-
Save vadimostanin/44bf976549454bc88fec54f7fd8d7899 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/habr/user.py b/habr/user.py | |
index f6cda91..d641782 100644 | |
--- a/habr/user.py | |
+++ b/habr/user.py | |
@@ -186,7 +186,7 @@ class TMUser(object): | |
doc = html.document_fromstring(requests.get(url).text) | |
out = dict() | |
pages = get_pages(doc) | |
- favs = doc.xpath("//div[@class='user_favorites']//a[@class='post_title']") | |
+ favs = doc.xpath("//div[@class='user_favorites']//a[@class='post__title_link']") | |
for f in favs: | |
# out[f.text] = str(f.attrib['href']).split('/')[-2] | |
# topic_id = | |
@@ -196,7 +196,7 @@ class TMUser(object): | |
# if show_progress: | |
# print('parsing page{0}... url={1}'.format(p, url)) | |
doc = html.document_fromstring(requests.get(url).text) | |
- favs = doc.xpath("//div[@class='user_favorites']//a[@class='post_title']") | |
+ favs = doc.xpath("//div[@class='user_favorites']//a[@class='post__title_link']") | |
for f in favs: | |
# out[f.text] = f.attrib['href'][-7:-1] | |
out[f.text] = str(f.attrib['href']).split('/')[-2] | |
@@ -210,7 +210,7 @@ class TMUser(object): | |
doc = html.document_fromstring(req.text) | |
out = dict() | |
pages = get_pages(doc) | |
- posts = doc.xpath("//div[@class='posts_list']//a[@class='post_title']") | |
+ posts = doc.xpath("//div[@class='posts_list']//a[@class='post__title_link']") | |
for f in posts: | |
# print(f.text) | |
out[f.text] = str(f.attrib['href']).split('/')[-2] | |
@@ -221,7 +221,7 @@ class TMUser(object): | |
if req.status_code != 200: | |
raise IOError('doc not found. URL = {}'.format(url)) | |
doc = html.document_fromstring(req.text) | |
- posts = doc.xpath("//div[@class='posts_list']//a[@class='post_title']") | |
+ posts = doc.xpath("//div[@class='posts_list']//a[@class='post__title_link']") | |
for f in posts: | |
out[f.text] = str(f.attrib['href']).split('/')[-2] | |
return out | |
diff --git a/habraparse.py b/habraparse.py | |
index 8dcdb8d..087a7f4 100755 | |
--- a/habraparse.py | |
+++ b/habraparse.py | |
@@ -54,7 +54,7 @@ def prepare_html(topic, with_comments=False): | |
<head> | |
<meta http-equiv="content-type" content="text/html; charset=utf-8"> | |
<meta charset="UTF-8"> | |
- <link href="http://habrahabr.ru/styles/1412005750/assets/post_common_css.css" rel="stylesheet" media="all" /> | |
+ <link href="http://habracdn.net/habr/styles/1469028327/_build/global_main.css" rel="stylesheet" media="all" /> | |
<title>{title}</title> | |
</head> | |
<body> | |
@@ -138,7 +138,7 @@ def save_pdf(topic_id: int, filename: str, with_comments: bool = False, project: | |
ht = HabraTopic(topic_id) | |
html = prepare_html(ht, with_comments=with_comments) | |
- css = CSS(string='@page { size: A4; margin: 1cm !important}') | |
+ css = CSS(string='@page { size: A4 landscape; margin: 1cm !important}') | |
HTML(string=html).write_pdf(filename, stylesheets=[css]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment