Skip to content

Instantly share code, notes, and snippets.

@hex128
Last active August 26, 2016 20:14
Show Gist options
  • Save hex128/be10f25645b4524207d6 to your computer and use it in GitHub Desktop.
Save hex128/be10f25645b4524207d6 to your computer and use it in GitHub Desktop.
Google Play Books Parser
#!/usr/bin/python2
# -*- coding: utf-8 -*-
from json import dumps
from sys import stdout, exit
from codecs import getwriter
from signal import signal, SIGINT
from urllib import urlopen
from bs4 import BeautifulSoup
def parse(html):
result = {}
soup = BeautifulSoup(html, "lxml")
result["url"] = soup.find("meta", {"itemprop": "url"})["content"]
result["name"] = soup.find("div", {"itemprop": "name"}).text.strip()
author = soup.find("div", {"itemprop": "author"})
result["author"] = {
"name": author.find("meta", {"itemprop": "name"})["content"],
"url": author.find("meta", {"itemprop": "url"})["content"]
}
offer = soup.find("span", {"itemprop": "offers"})
result["price"] = offer.find("meta", {"itemprop": "price"})["content"]
result["preview"] = offer.find("meta", {"itemprop": "previewUrl"})["content"]
result["rating"] = soup.find("meta", {"itemprop": "ratingValue"})["content"]
result["rating-count"] = soup.find("meta", {"itemprop": "ratingCount"})["content"]
result["description"] = unicode(soup.find("div", {"class": "details-section-body"}))
result["pages"] = soup.find("a", {"itemprop": "num_pages"}).text.strip()
result["language"] = soup.find("div", {"itemprop": "language"}).text.strip()
result["isbn"] = soup.find("div", {"itemprop": "isbn"}).text.strip()
result["genres"] = []
for genre in soup.find_all("a", {"itemprop": "genre"}):
result["genres"].append({
"name": genre.text.strip(),
"url": genre["href"]
})
result["protection"] = soup.find("div", {"itemprop": "contentProtection"}).text.strip()
return result
def main():
sout = getwriter("utf8")(stdout)
data = parse(urlopen("https://play.google.com/store/books/details/Douglas_Adams_The_Ultimate_Hitchhiker_s_Guide_to_t?id=mO-62VxpLe0C&hl=en").read())
sout.write(dumps(data, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': ')) + "\n")
if __name__ == "__main__":
def signal_handler(signal, frame):
exit(0)
signal(SIGINT, signal_handler)
main()
{
"author": {
"name": "Douglas Adams",
"url": "/store/books/author?id=Douglas+Adams"
},
"description": "<div class=\"details-section-body expandable\"> <div class=\"full-text multicol\" data-multicol-fixed-height=\"true\" data-multicol-short-layout=\"true\" data-multicol-text=\"true\"> <b>At last in paperback in one complete volume, here are the five classic novels from Douglas Adams’s beloved Hitchiker series.<br/></b><br/><b><i>The Hitchhiker’s Guide to the Galaxy<br/></i></b>Seconds before the Earth is demolished for a galactic freeway, Arthur Dent is saved by Ford Prefect, a researcher for the revised <i>Guide</i>. Together they stick out their thumbs to the stars and begin a wild journey through time and space.<br/><br/><b><i>The Restaurant at the End of the Universe<br/></i></b>Facing annihilation at the hands of warmongers is a curious time to crave tea. It could only happen to the cosmically displaced Arthur Dent and his comrades as they hurtle across the galaxy in a desperate search for a place to eat.<br/><br/><b><i>Life, the Universe and Everything<br/></i></b>The unhappy inhabitants of planet Krikkit are sick of looking at the night sky– so they plan to destroy it. The universe, that is. Now only five individuals can avert Armageddon: mild-mannered Arthur Dent and his stalwart crew.<br/><br/><b><i>So Long, and Thanks for All the Fish<br/></i></b>Back on Earth, Arthur Dent is ready to believe that the past eight years were all just a figment of his stressed-out imagination. But a gift-wrapped fishbowl with a cryptic inscription conspires to thrust him back to reality. So to speak.<br/><br/><b><i>Mostly Harmless<br/></i></b>Just when Arthur Dent makes the terrible mistake of starting to enjoy life, all hell breaks loose. Can he save the Earth from total obliteration? Can he save the <i>Guide</i> from a hostile alien takeover? Can he save his daughter from herself?<br/><br/><br/><i>From the Trade Paperback edition.</i> </div> </div>",
"genres": [
{
"name": "Science Fiction & Fantasy",
"url": "/store/books/category/coll_1604"
},
{
"name": "Space Opera",
"url": "/store/books/category/subj_Science_Fiction___Fantasy.Space_Opera/collection/featured"
},
{
"name": "Fiction / Humorous",
"url": "/store/search?q=subject:%22Fiction+/+Humorous%22&c=books"
},
{
"name": "Fiction / Science Fiction / Action & Adventure",
"url": "/store/search?q=subject:%22Fiction+/+Science+Fiction+/+Action+%26+Adventure%22&c=books"
},
{
"name": "Fiction / Science Fiction / Space Opera",
"url": "/store/search?q=subject:%22Fiction+/+Science+Fiction+/+Space+Opera%22&c=books"
}
],
"isbn": "9780307498465",
"language": "English",
"name": "The Ultimate Hitchhiker's Guide to the Galaxy",
"pages": "832",
"preview": "https://play.google.com/books/reader?id=mO-62VxpLe0C&printsec=frontcover&output=reader",
"price": "UAH155.72",
"protection": "This content is DRM protected.",
"rating": "4.4338765144348145",
"rating-count": "2639",
"url": "https://play.google.com/store/books/details?id=mO-62VxpLe0C"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment