Skip to content

Instantly share code, notes, and snippets.

@EthanRosenthal
Last active May 15, 2019 07:59
Show Gist options
  • Save EthanRosenthal/47c4c3bc25845524f182 to your computer and use it in GitHub Desktop.
Save EthanRosenthal/47c4c3bc25845524f182 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0\r\n",
"2|GoldenEye (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?GoldenEye%20(1995)|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0\r\n",
"3|Four Rooms (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995)|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0\r\n",
"4|Get Shorty (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995)|0|1|0|0|0|1|0|0|1|0|0|0|0|0|0|0|0|0|0\r\n",
"5|Copycat (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Copycat%20(1995)|0|0|0|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0\r\n"
]
}
],
"source": [
"!head -5 u.item"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tt0114709\n"
]
}
],
"source": [
"import requests\n",
"import json\n",
"\n",
"response = requests.get('http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)')\n",
"print response.url.split('/')[-2]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Build function to query themoviedb.org's API\n",
"\n",
"# Get base url filepath structure. w185 corresponds to size of movie poster.\n",
"api_key = 'INSERT API KEY HERE'\n",
"headers = {'Accept': 'application/json'}\n",
"payload = {'api_key': api_key} \n",
"response = requests.get(\"http://api.themoviedb.org/3/configuration\",\\\n",
" params=payload,\\\n",
" headers=headers)\n",
"response = json.loads(response.text)\n",
"base_url = response['images']['base_url'] + 'w185'\n",
"\n",
"def get_poster(imdb_url, base_url, api_key):\n",
" # Get IMDB movie ID\n",
" response = requests.get(imdb_url)\n",
" movie_id = response.url.split('/')[-2]\n",
" \n",
" # Query themoviedb.org API for movie poster path.\n",
" movie_url = 'http://api.themoviedb.org/3/movie/{:}/images'.format(movie_id)\n",
" headers = {'Accept': 'application/json'}\n",
" payload = {'api_key': api_key} \n",
" response = requests.get(movie_url, params=payload, headers=headers)\n",
" try:\n",
" file_path = json.loads(response.text)['posters'][0]['file_path']\n",
" except:\n",
" # IMDB movie ID is sometimes no good. Need to get correct one.\n",
" movie_title = imdb_url.split('?')[-1].split('(')[0]\n",
" payload['query'] = movie_title\n",
" response = requests.get('http://api.themoviedb.org/3/search/movie',\\\n",
" params=payload,\\\n",
" headers=headers)\n",
" try:\n",
" movie_id = json.loads(response.text)['results'][0]['id']\n",
" payload.pop('query', None)\n",
" movie_url = 'http://api.themoviedb.org/3/movie/{:}/images'\\\n",
" .format(movie_id)\n",
" response = requests.get(movie_url, params=payload, headers=headers)\n",
" file_path = json.loads(response.text)['posters'][0]['file_path']\n",
" except:\n",
" # Sometimes the url just doesn't work.\n",
" # Return '' so that it does not mess up Image()\n",
" return ''\n",
" \n",
" return base_url + file_path"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<img src=\"http://image.tmdb.org/t/p/w185/uMZqKhT4YA6mqo2yczoznv7IDmv.jpg\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import Image\n",
"from IPython.display import display\n",
"\n",
"toy_story = 'http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)'\n",
"Image(url=get_poster(toy_story, base_url, api_key))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from IPython.display import HTML\n",
"\n",
"def display_top_k_movies(similarity, mapper, movie_idx, base_url, api_key, k=5):\n",
" movie_indices = np.argsort(similarity[movie_idx,:])[::-1]\n",
" images = ''\n",
" k_ctr = 0\n",
" # Start i at 1 to not grab the input movie\n",
" i = 1\n",
" while k_ctr < 5:\n",
" movie = mapper[movie_indices[i]]\n",
" poster = get_poster(movie, base_url, api_key)\n",
" if poster != '':\n",
" images += \"<img style='width: 120px; margin: 0px; \\\n",
" float: left; border: 1px solid black;' src='%s' />\"\\\n",
" % poster\n",
" k_ctr += 1\n",
" i += 1\n",
" display(HTML(images))\n",
"\n",
"def compare_recs(als_similarity, sgd_similarity, mapper,\\\n",
" movie_idx, base_url, api_key, k=5):\n",
" # Display input\n",
" display(HTML('<font size=5>'+'Input'+'</font>'))\n",
" input_poster = get_poster(mapper[movie_idx], base_url, api_key)\n",
" input_image = \"<img style='width: 120px; margin: 0px; \\\n",
" float: left; border: 1px solid black;' src='%s' />\" \\\n",
" % input_poster\n",
" display(HTML(input_image))\n",
" # Display ALS Recs\n",
" display(HTML('<font size=5>'+'ALS Recs'+'</font>'))\n",
" display_top_k_movies(als_similarity, idx_to_movie,\\\n",
" movie_idx, base_url, api_key)\n",
" # Display SGD Recs\n",
" display(HTML('<font size=5>'+'SGD Recs'+'</font>'))\n",
" display_top_k_movies(sgd_similarity, idx_to_movie,\\\n",
" movie_idx, base_url, api_key)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment