Last active
May 15, 2019 07:59
-
-
Save EthanRosenthal/47c4c3bc25845524f182 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0\r\n", | |
"2|GoldenEye (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?GoldenEye%20(1995)|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0\r\n", | |
"3|Four Rooms (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995)|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0\r\n", | |
"4|Get Shorty (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995)|0|1|0|0|0|1|0|0|1|0|0|0|0|0|0|0|0|0|0\r\n", | |
"5|Copycat (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Copycat%20(1995)|0|0|0|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!head -5 u.item" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tt0114709\n" | |
] | |
} | |
], | |
"source": [ | |
"import requests\n", | |
"import json\n", | |
"\n", | |
"response = requests.get('http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)')\n", | |
"print response.url.split('/')[-2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# Build function to query themoviedb.org's API\n", | |
"\n", | |
"# Get base url filepath structure. w185 corresponds to size of movie poster.\n", | |
"api_key = 'INSERT API KEY HERE'\n", | |
"headers = {'Accept': 'application/json'}\n", | |
"payload = {'api_key': api_key} \n", | |
"response = requests.get(\"http://api.themoviedb.org/3/configuration\",\\\n", | |
" params=payload,\\\n", | |
" headers=headers)\n", | |
"response = json.loads(response.text)\n", | |
"base_url = response['images']['base_url'] + 'w185'\n", | |
"\n", | |
"def get_poster(imdb_url, base_url, api_key):\n", | |
" # Get IMDB movie ID\n", | |
" response = requests.get(imdb_url)\n", | |
" movie_id = response.url.split('/')[-2]\n", | |
" \n", | |
" # Query themoviedb.org API for movie poster path.\n", | |
" movie_url = 'http://api.themoviedb.org/3/movie/{:}/images'.format(movie_id)\n", | |
" headers = {'Accept': 'application/json'}\n", | |
" payload = {'api_key': api_key} \n", | |
" response = requests.get(movie_url, params=payload, headers=headers)\n", | |
" try:\n", | |
" file_path = json.loads(response.text)['posters'][0]['file_path']\n", | |
" except:\n", | |
" # IMDB movie ID is sometimes no good. Need to get correct one.\n", | |
" movie_title = imdb_url.split('?')[-1].split('(')[0]\n", | |
" payload['query'] = movie_title\n", | |
" response = requests.get('http://api.themoviedb.org/3/search/movie',\\\n", | |
" params=payload,\\\n", | |
" headers=headers)\n", | |
" try:\n", | |
" movie_id = json.loads(response.text)['results'][0]['id']\n", | |
" payload.pop('query', None)\n", | |
" movie_url = 'http://api.themoviedb.org/3/movie/{:}/images'\\\n", | |
" .format(movie_id)\n", | |
" response = requests.get(movie_url, params=payload, headers=headers)\n", | |
" file_path = json.loads(response.text)['posters'][0]['file_path']\n", | |
" except:\n", | |
" # Sometimes the url just doesn't work.\n", | |
" # Return '' so that it does not mess up Image()\n", | |
" return ''\n", | |
" \n", | |
" return base_url + file_path" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"http://image.tmdb.org/t/p/w185/uMZqKhT4YA6mqo2yczoznv7IDmv.jpg\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from IPython.display import Image\n", | |
"from IPython.display import display\n", | |
"\n", | |
"toy_story = 'http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)'\n", | |
"Image(url=get_poster(toy_story, base_url, api_key))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from IPython.display import HTML\n", | |
"\n", | |
"def display_top_k_movies(similarity, mapper, movie_idx, base_url, api_key, k=5):\n", | |
" movie_indices = np.argsort(similarity[movie_idx,:])[::-1]\n", | |
" images = ''\n", | |
" k_ctr = 0\n", | |
" # Start i at 1 to not grab the input movie\n", | |
" i = 1\n", | |
" while k_ctr < 5:\n", | |
" movie = mapper[movie_indices[i]]\n", | |
" poster = get_poster(movie, base_url, api_key)\n", | |
" if poster != '':\n", | |
" images += \"<img style='width: 120px; margin: 0px; \\\n", | |
" float: left; border: 1px solid black;' src='%s' />\"\\\n", | |
" % poster\n", | |
" k_ctr += 1\n", | |
" i += 1\n", | |
" display(HTML(images))\n", | |
"\n", | |
"def compare_recs(als_similarity, sgd_similarity, mapper,\\\n", | |
" movie_idx, base_url, api_key, k=5):\n", | |
" # Display input\n", | |
" display(HTML('<font size=5>'+'Input'+'</font>'))\n", | |
" input_poster = get_poster(mapper[movie_idx], base_url, api_key)\n", | |
" input_image = \"<img style='width: 120px; margin: 0px; \\\n", | |
" float: left; border: 1px solid black;' src='%s' />\" \\\n", | |
" % input_poster\n", | |
" display(HTML(input_image))\n", | |
" # Display ALS Recs\n", | |
" display(HTML('<font size=5>'+'ALS Recs'+'</font>'))\n", | |
" display_top_k_movies(als_similarity, idx_to_movie,\\\n", | |
" movie_idx, base_url, api_key)\n", | |
" # Display SGD Recs\n", | |
" display(HTML('<font size=5>'+'SGD Recs'+'</font>'))\n", | |
" display_top_k_movies(sgd_similarity, idx_to_movie,\\\n", | |
" movie_idx, base_url, api_key)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment