Last active
August 26, 2016 20:12
-
-
Save hex128/0dc2d2019d63ce596e85 to your computer and use it in GitHub Desktop.
Google Play Movie Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
# -*- coding: utf-8 -*- | |
from json import dumps | |
from sys import stdout, exit | |
from codecs import getwriter | |
from signal import signal, SIGINT | |
from urllib import urlopen | |
from bs4 import BeautifulSoup | |
def parse(html): | |
result = {} | |
soup = BeautifulSoup(html, "lxml") | |
result["url"] = soup.find("meta", {"itemprop": "url"})["content"] | |
result["name"] = soup.find("div", {"itemprop": "name"}).text.strip() | |
result["published"] = soup.find("div", {"itemprop": "datePublished"}).text.strip() | |
result["genre"] = soup.find("span", {"itemprop": "genre"}).text.strip() | |
result["offers"] = [] | |
if soup.find("button", {"class": "price"}): | |
for offer in soup.find("button", {"class": "price"}).find_all("span", {"itemprop": "offers"}): | |
result["offers"].append({ | |
"description": offer.find("meta", {"itemprop": "description"})["content"], | |
"price": offer.find("meta", {"itemprop": "price"})["content"] | |
}) | |
result["rating"] = soup.find("meta", {"itemprop": "ratingValue"})["content"] | |
result["rating-count"] = soup.find("meta", {"itemprop": "ratingCount"})["content"] | |
trailer = soup.find("span", {"class": "details-trailer"}) | |
if trailer: | |
result["trailer"] = trailer.find("span", {"class": "preview-overlay-container"})["data-video-url"] | |
else: | |
result["trailer"] = None | |
result["description"] = unicode(soup.find("div", {"class": "details-section-body"})) | |
details = soup.find("div", {"class": "cc-contents"}) | |
result["actors"] = [] | |
for actor in details.find_all("span", {"itemprop": "actor"}): | |
result["actors"].append({ | |
"name": actor.find("span", {"itemprop": "name"}).text.strip(), | |
"url": actor.find("a", {"itemprop": "url"})["href"] | |
}) | |
result["producers"] = [] | |
for producer in details.find_all("span", {"itemprop": "producer"}): | |
result["producers"].append({ | |
"name": producer.find("span", {"itemprop": "name"}).text.strip(), | |
"url": producer.find("a", {"itemprop": "url"})["href"] | |
}) | |
director = details.find("span", {"itemprop": "director"}) | |
result["director"] = { | |
"name": director.find("span", {"itemprop": "name"}).text.strip(), | |
"url": director.find("a", {"itemprop": "url"})["href"] | |
} | |
result["authors"] = [] | |
for author in details.find_all("span", {"itemprop": "actor"}): | |
result["authors"].append({ | |
"name": author.find("span", {"itemprop": "name"}).text.strip(), | |
"url": author.find("a", {"itemprop": "url"})["href"] | |
}) | |
return result | |
def main(): | |
sout = getwriter("utf8")(stdout) | |
data = parse(urlopen("https://play.google.com/store/movies/details?id=P_lCpSna7mY&hl=en").read()) | |
sout.write(dumps(data, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': ')) + "\n") | |
if __name__ == "__main__": | |
def signal_handler(signal, frame): | |
exit(0) | |
signal(SIGINT, signal_handler) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"actors": [ | |
{ | |
"name": "Benedict Cumberbatch", | |
"url": "/store/search?q=Benedict+Cumberbatch&c=movies" | |
}, | |
{ | |
"name": "Keira Knightley", | |
"url": "/store/search?q=Keira+Knightley&c=movies" | |
}, | |
{ | |
"name": "Matthew Goode", | |
"url": "/store/search?q=Matthew+Goode&c=movies" | |
}, | |
{ | |
"name": "Rory Kinnear", | |
"url": "/store/search?q=Rory+Kinnear&c=movies" | |
}, | |
{ | |
"name": "Allen Leech", | |
"url": "/store/search?q=Allen+Leech&c=movies" | |
}, | |
{ | |
"name": "Matthew Beard", | |
"url": "/store/search?q=Matthew+Beard&c=movies" | |
}, | |
{ | |
"name": "Charles Dance", | |
"url": "/store/search?q=Charles+Dance&c=movies" | |
}, | |
{ | |
"name": "Mark Strong", | |
"url": "/store/search?q=Mark+Strong&c=movies" | |
} | |
], | |
"authors": [ | |
{ | |
"name": "Benedict Cumberbatch", | |
"url": "/store/search?q=Benedict+Cumberbatch&c=movies" | |
}, | |
{ | |
"name": "Keira Knightley", | |
"url": "/store/search?q=Keira+Knightley&c=movies" | |
}, | |
{ | |
"name": "Matthew Goode", | |
"url": "/store/search?q=Matthew+Goode&c=movies" | |
}, | |
{ | |
"name": "Rory Kinnear", | |
"url": "/store/search?q=Rory+Kinnear&c=movies" | |
}, | |
{ | |
"name": "Allen Leech", | |
"url": "/store/search?q=Allen+Leech&c=movies" | |
}, | |
{ | |
"name": "Matthew Beard", | |
"url": "/store/search?q=Matthew+Beard&c=movies" | |
}, | |
{ | |
"name": "Charles Dance", | |
"url": "/store/search?q=Charles+Dance&c=movies" | |
}, | |
{ | |
"name": "Mark Strong", | |
"url": "/store/search?q=Mark+Strong&c=movies" | |
} | |
], | |
"description": "<div class=\"details-section-body expandable\"> <div class=\"full-text multicol\" data-multicol-fixed-height=\"true\" data-multicol-text=\"true\"> <span class=\"details-trailer\"> <span class=\"video-image-wrapper\"> <img class=\"video-image\" src=\"https://lh3.googleusercontent.com/LIbDUFQL85Yn6IOp-l4H93chch6q58HnjdCmbYcd510x4thv4oehJAbETMzj43iVYETc=w315\"/> </span> <span class=\"preview-overlay-container\" data-docid=\"movie-P_lCpSna7mY\" data-video-url=\"https://www.youtube.com/embed/Agd89L0CvO8?ps=play&vq=large&rel=0&autohide=1&showinfo=0&autoplay=1\"> <span class=\"play-action-container\" data-video-url=\"https://www.youtube.com/embed/Agd89L0CvO8?ps=play&vq=large&rel=0&autohide=1&showinfo=0&autoplay=1\"> <span class=\"play-action\"></span> </span> </span> </span> Academy Award®-Winner for Best Adapted Screenplay. Academy Award®-nominee Benedict Cumberbatch (TV's SHERLOCK, STAR TREK INTO DARKNESS) shines as real-life war hero and pioneer of modern-day computing, Alan Turing. THE IMITATION GAME follows Turing as he leads a motley crew of scholars, linguists, chess champions, and intelligence officers in cracking the so-called unbreakable codes of Germany's World War II Enigma machine, potentially saving millions of lives by helping to shorten the war. Also depicted is Turing's tragic fall from grace when he was convicted of homosexuality - a crime in post-war Britain. Co-starring Academy Award®-nominee Keira Knightley of BEGIN AGAIN and PIRATES OF THE CARIBBEAN FRANCHISE. </div> </div>", | |
"director": { | |
"name": "Morten Tyldum", | |
"url": "/store/search?q=Morten+Tyldum&c=movies" | |
}, | |
"genre": "Drama", | |
"name": "The Imitation Game", | |
"offers": [], | |
"producers": [ | |
{ | |
"name": "Nora Grossman", | |
"url": "/store/search?q=Nora+Grossman&c=movies" | |
}, | |
{ | |
"name": "Ido Ostrowsky", | |
"url": "/store/search?q=Ido+Ostrowsky&c=movies" | |
}, | |
{ | |
"name": "Teddy Schwarzman", | |
"url": "/store/search?q=Teddy+Schwarzman&c=movies" | |
}, | |
{ | |
"name": "Peter Heslop", | |
"url": "/store/search?q=Peter+Heslop&c=movies" | |
}, | |
{ | |
"name": "Graham Moore", | |
"url": "/store/search?q=Graham+Moore&c=movies" | |
} | |
], | |
"published": "February 2015", | |
"rating": "4.300000190734863", | |
"rating-count": "170", | |
"trailer": "https://www.youtube.com/embed/Agd89L0CvO8?ps=play&vq=large&rel=0&autohide=1&showinfo=0&autoplay=1", | |
"url": "https://play.google.com/store/movies/details?id=P_lCpSna7mY" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment