Last active
May 18, 2023 19:21
-
-
Save abhididdigi/6146857 to your computer and use it in GitHub Desktop.
A call to IMDB API for getting the Movie information. If there is an exact match, then it returns HTML. So scrape it, and get the details.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import httplib2 | |
import urllib | |
from bs4 import BeautifulSoup | |
import ast | |
import json | |
import re | |
class IMDBCall: | |
''' This class gets JSON output for every search of IMDB API. | |
''' | |
def writeLog(self,title): | |
''' | |
writeLog will write the content of those movies, which cannot be fetched. | |
''' | |
with open('error.json',"a") as file_name: | |
file_name.write(title) | |
def getResults(self,name): | |
print "creating a request for", name | |
#Get the movie name - and Percent Encode it. | |
name = urllib.quote(name); | |
title = '' | |
h = httplib2.Http(".cache") | |
#First make the call to IMDB and try getting the JSON. | |
resp, content = h.request("http://www.imdb.com/xml/find?json=1&q="+name+"&s=all", "GET") | |
try: | |
#If the JSON returns is Valid, then process it and get the exact title name. | |
#If the JSON isn't valid,i.e. if you get an XML, then raise an exception. | |
#If you get a Valid JSON- You still don't have the information about | |
#genres, so make an explicit call to IMDBAPI to get the tags from the | |
#movie name. | |
json.loads(content) | |
#Catch any error | |
try: | |
#if the JSON is valid : IMDB gives two types of objects when you search | |
#If there is an exact match, then you get `exact_match` object, else you | |
#get `title_popular` if there is a approximate match. | |
#convert the JSON into a Python dictionary. | |
cont = ast.literal_eval(content) | |
#check for both title popular and title_exact, if none found, return. | |
if 'title_popular' in cont: | |
title = ast.literal_eval(content)['title_popular'][0]['title'] | |
elif 'title_exact' in cont: | |
title = ast.literal_eval(content)['title_exact'][0]['title'] | |
else: | |
print("No title_popular or title_exact found. Hence returning.") | |
self.writeLog('No title_popular or title_exact found. Hence returning.',title); | |
return; | |
except: | |
print("error with title_popular",content) | |
self.writeLog(content+":"+title); | |
return; | |
#if all is well, then get the title | |
actualTitle = title | |
#replacing ' from the movie results(for making the next call) | |
title = re.sub("'","",actualTitle) | |
#Percent encode this(for making the next call) | |
title = urllib.quote(title); | |
#make the call to IMDBAPI for getting the tags. | |
resp,content = h.request("http://mymovieapi.com/?title="+title+"&type=json&plot=simple&episode=1&limit=1&yg=0&mt=none&lang=en-US&offset=&aka=simple&release=simple&business=0&tech=0") | |
#Evaluate the JSON and create a dictionary. | |
try: | |
movieDict1 = dict() | |
try: | |
movieDict1['tags'] = ast.literal_eval(content)[0]['genres'] | |
movieDict1['name'] = actualTitle | |
movieDict1['ready'] = 'true' | |
#return the information in the format after making it a string. | |
return json.dumps(movieDict1); | |
except KeyError: | |
print("Did nothing, as there is no genre" ,content) | |
self.writeLog(content); | |
return ; | |
except ValueError: | |
print "Error" | |
self.writeLog(content); | |
return | |
#Here is the best part about IMDB: | |
#If it is very sure that the movie you are searching is already there | |
#It returns the entire HTML page of that movie. | |
#Hence all you have to do is to Scrape the page. | |
#Using Beautiful Soup to scrape the page. | |
#This page also contains the genres, So you actually don't need to | |
#make another call. | |
except : | |
print 'inside' | |
movieDict = dict(); | |
soup = BeautifulSoup(content) | |
try: | |
movieDict['name'] = soup.find_all(itemprop="name")[0].get_text(); | |
movieDict['ready'] = 'true' | |
movieDict['tags'] = [BeautifulSoup(str(x)).span.get_text() for x in soup.find_all(itemprop ="genre") if str(BeautifulSoup(str(x)).span) != 'None' and BeautifulSoup(str(x)).span.get_text() != '|'] | |
except: | |
self.writeLog(content) | |
return | |
return json.dumps(movieDict) | |
#sample call | |
print IMDBCall().getResults('8MM') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment