Created
January 31, 2017 21:33
-
-
Save JKirchartz/80ad6ec90d44b58486db89058d2fdb37 to your computer and use it in GitHub Desktop.
Download all quotes from GoodReads by author's quote URL, print in fortune format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# vim:fenc=utf-8 | |
# | |
# Copyleft (ↄ) 2016 jkirchartz <[email protected]> | |
# | |
# Distributed under terms of the NPL (Necessary Public License) license. | |
""" | |
Download all quotes from GoodReads by author's quote URL, print in fortune format | |
usage: | |
python goodreadsquotes.py https://www.goodreads.com/author/quotes/1791.Seth_Godin > godin | |
""" | |
from pyquery import PyQuery | |
import sys, random, re, time | |
AUTHOR_REX = re.compile('\d+\.(\w+)$') | |
def grabber(base_url, i=1): | |
url = base_url + "?page=" + str(i) | |
page = PyQuery(url) | |
quotes = page(".quoteText") | |
auth_match = re.search(AUTHOR_REX, base_url) | |
if auth_match: | |
author = re.sub('_', ' ', auth_match.group(1)) | |
else: | |
author = False | |
# sys.stderr.write(url + "\n") | |
for quote in quotes.items(): | |
quote = quote.remove('script').text().encode('ascii', 'ignore') | |
if author: | |
quote = quote.replace(author, " -- " + author) | |
print quote | |
print '%' | |
if not page('.next_page').hasClass('disabled'): | |
time.sleep(10) | |
grabber(base_url, i + 1) | |
if __name__ == "__main__": | |
grabber(''.join(sys.argv[1:])) |
Just add () around quote. Python 3 needs brackets for printing. May be the above code is in python 2.
Print (quote)
Print (%)
Here's a version patched up for python3.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
# https://gist.github.com/JKirchartz/80ad6ec90d44b58486db89058d2fdb37
#
# Copyleft (ↄ) 2016 jkirchartz <[email protected]>
#
# Distributed under terms of the NPL (Necessary Public License) license.
"""
Download all quotes from GoodReads by author's quote URL, print in fortune format
usage:
python goodreadsquotes.py https://www.goodreads.com/author/quotes/1791.Seth_Godin > godin
"""
from pyquery import PyQuery
import sys, random, re, time
AUTHOR_REX = re.compile('\d+\.(\w+)$')
def grabber(base_url, i=1):
url = "{}?page={}".format(base_url, str(i))
page = PyQuery(url)
quotes = page(".quoteText")
auth_match = re.search(AUTHOR_REX, base_url)
if auth_match:
author = re.sub('_', ' ', auth_match.group(1))
else:
author = False
for quote in quotes.items():
quote = quote.remove('script').text().encode('ascii', 'ignore')
if author:
quote = quote.decode('utf-8').replace(author, " -- " + author)
print(quote)
print('%')
if not page('.next_page').hasClass('disabled'):
time.sleep(10)
grabber(base_url, i + 1)
if __name__ == "__main__":
grabber(''.join(sys.argv[1:]))
Doesn't seem to be working for me, it just gets stuck without ever retrieving a quote.
I rewrote this for modern python:
https://gist.github.com/C0rn3j/1bc48d933068da0fdba4089ac9f783ff
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I don't know Python so currently can't fix the problem , there is an error --> SyntaxError: Missing parentheses in call to 'print'. Did you mean print(print quote)? (line 35)