Last active
January 15, 2018 06:25
-
-
Save aa-gamJain/9eae9aecaf5a0f4d5c9a300a2a60a20d to your computer and use it in GitHub Desktop.
Coinioc Task 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Importing all Libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from bs4 import BeautifulSoup\n", | |
"\n", | |
"from IPython.display import Image, display\n", | |
"from IPython.core.display import HTML \n", | |
"\n", | |
"pd.set_option('display.max_rows',100)\n", | |
"from IPython.core.interactiveshell import InteractiveShell\n", | |
"InteractiveShell.ast_node_interactivity = \"all\"\n", | |
"\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Getting data from the link" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"page = requests.get(\"https://www.pcgs.com/SetRegistry/dollars/morgan-dollars-major-sets/morgan-dollars-basic-set-circulation-strikes-1878-1921/publishedset/12698\")\n", | |
"soup = BeautifulSoup(page.content, 'html.parser')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Web Scraping" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()\n", | |
"for tr in soup.find_all('tr')[1:]:\n", | |
" tds = tr.find_all('td')\n", | |
" if tds[1].find('a')==None:\n", | |
" df = df.append(pd.Series([np.NAN,tds[2].text,tds[3].text,tds[4].text,tds[5].text,tds[6].text,\n", | |
" tds[7].text,tds[8].text,tds[9].text,tds[10].text]),ignore_index=True)\n", | |
" else:\n", | |
" df = df.append(pd.Series([tds[1].find('a')['href'],tds[2].text,tds[3].text,tds[4].text,tds[5].text,tds[6].text,\n", | |
" tds[7].text,tds[8].text,tds[9].text,tds[10].text]),ignore_index=True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Data in that page" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Image_Link</th>\n", | |
" <th>Item</th>\n", | |
" <th>PCGS #</th>\n", | |
" <th>Date</th>\n", | |
" <th>Denom</th>\n", | |
" <th>Grade</th>\n", | |
" <th>PCGS # POP</th>\n", | |
" <th>PCGS # POP Higher</th>\n", | |
" <th>Pop</th>\n", | |
" <th>Pop Higher</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>https://images.pcgs.com/CoinFacts/83747140_800...</td>\n", | |
" <td>\\n1878 8TF\\n</td>\n", | |
" <td>7072</td>\n", | |
" <td>1878 8TF</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS65</td>\n", | |
" <td>704</td>\n", | |
" <td>129</td>\n", | |
" <td>830</td>\n", | |
" <td>141</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>https://images.pcgs.com/CoinFacts/81947412_800...</td>\n", | |
" <td>\\n1878 7TF\\n</td>\n", | |
" <td>7074</td>\n", | |
" <td>1878 7TF</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS66+</td>\n", | |
" <td>8</td>\n", | |
" <td>0</td>\n", | |
" <td>8</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1878 7/8TF\\n</td>\n", | |
" <td>7078</td>\n", | |
" <td>1878 7/8TF</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS65</td>\n", | |
" <td>344</td>\n", | |
" <td>43</td>\n", | |
" <td>632</td>\n", | |
" <td>72</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1878-CC\\n</td>\n", | |
" <td>7080</td>\n", | |
" <td>1878-CC</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS66</td>\n", | |
" <td>313</td>\n", | |
" <td>53</td>\n", | |
" <td>328</td>\n", | |
" <td>53</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1878-S\\n</td>\n", | |
" <td>7082</td>\n", | |
" <td>1878-S</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS66+</td>\n", | |
" <td>88</td>\n", | |
" <td>36</td>\n", | |
" <td>90</td>\n", | |
" <td>38</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1879\\n</td>\n", | |
" <td>7084</td>\n", | |
" <td>1879</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS65+</td>\n", | |
" <td>62</td>\n", | |
" <td>279</td>\n", | |
" <td>65</td>\n", | |
" <td>283</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1879-CC\\n</td>\n", | |
" <td>7086</td>\n", | |
" <td>1879-CC</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS64+</td>\n", | |
" <td>58</td>\n", | |
" <td>110</td>\n", | |
" <td>90</td>\n", | |
" <td>135</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1879-O\\n</td>\n", | |
" <td>7090</td>\n", | |
" <td>1879-O</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS65</td>\n", | |
" <td>464</td>\n", | |
" <td>79</td>\n", | |
" <td>468</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1879-S\\n</td>\n", | |
" <td>7092</td>\n", | |
" <td>1879-S</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS67</td>\n", | |
" <td>1553</td>\n", | |
" <td>260</td>\n", | |
" <td>1610</td>\n", | |
" <td>268</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>NaN</td>\n", | |
" <td>\\n1880\\n</td>\n", | |
" <td>7096</td>\n", | |
" <td>1880</td>\n", | |
" <td>$1</td>\n", | |
" <td>MS66</td>\n", | |
" <td>190</td>\n", | |
" <td>19</td>\n", | |
" <td>201</td>\n", | |
" <td>19</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Image_Link Item PCGS # \\\n", | |
"0 https://images.pcgs.com/CoinFacts/83747140_800... \\n1878 8TF\\n 7072 \n", | |
"1 https://images.pcgs.com/CoinFacts/81947412_800... \\n1878 7TF\\n 7074 \n", | |
"2 NaN \\n1878 7/8TF\\n 7078 \n", | |
"3 NaN \\n1878-CC\\n 7080 \n", | |
"4 NaN \\n1878-S\\n 7082 \n", | |
"5 NaN \\n1879\\n 7084 \n", | |
"6 NaN \\n1879-CC\\n 7086 \n", | |
"7 NaN \\n1879-O\\n 7090 \n", | |
"8 NaN \\n1879-S\\n 7092 \n", | |
"9 NaN \\n1880\\n 7096 \n", | |
"\n", | |
" Date Denom Grade PCGS # POP PCGS # POP Higher Pop Pop Higher \n", | |
"0 1878 8TF $1 MS65 704 129 830 141 \n", | |
"1 1878 7TF $1 MS66+ 8 0 8 1 \n", | |
"2 1878 7/8TF $1 MS65 344 43 632 72 \n", | |
"3 1878-CC $1 MS66 313 53 328 53 \n", | |
"4 1878-S $1 MS66+ 88 36 90 38 \n", | |
"5 1879 $1 MS65+ 62 279 65 283 \n", | |
"6 1879-CC $1 MS64+ 58 110 90 135 \n", | |
"7 1879-O $1 MS65 464 79 468 80 \n", | |
"8 1879-S $1 MS67 1553 260 1610 268 \n", | |
"9 1880 $1 MS66 190 19 201 19 " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.columns = ['Image_Link','Item','PCGS #','Date','Denom','Grade','PCGS # POP','PCGS # POP Higher','Pop','Pop Higher']\n", | |
"df.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Links of all images" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 https://images.pcgs.com/CoinFacts/83747140_800...\n", | |
"1 https://images.pcgs.com/CoinFacts/81947412_800...\n", | |
"11 https://images.pcgs.com/TrueView/28120389_800x...\n", | |
"24 https://images.pcgs.com/TrueView/25288620_800x...\n", | |
"31 https://images.pcgs.com/CoinFacts/27669756_800...\n", | |
"34 https://images.pcgs.com/CoinFacts/83887969_800...\n", | |
"35 https://images.pcgs.com/TrueView/83892824_800x...\n", | |
"43 https://images.pcgs.com/CoinFacts/83556710_800...\n", | |
"50 https://images.pcgs.com/TrueView/80612582_800x...\n", | |
"54 https://images.pcgs.com/CoinFacts/80596870_800...\n", | |
"61 https://images.pcgs.com/CoinFacts/81875418_800...\n", | |
"65 https://images.pcgs.com/TrueView/80607578_800x...\n", | |
"66 https://images.pcgs.com/SecurePlus/25528880_80...\n", | |
"71 https://images.pcgs.com/CoinFacts/25386815_800...\n", | |
"87 https://images.pcgs.com/TrueView/82994298_800x...\n", | |
"Name: Image_Link, dtype: object" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"images = df['Image_Link'][df.Image_Link.notnull()]\n", | |
"images" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Displaying all images with that link" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/83747140_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/83747140_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/81947412_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/81947412_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/TrueView/28120389_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/TrueView/28120389_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/TrueView/25288620_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/TrueView/25288620_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/27669756_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/27669756_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/83887969_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/83887969_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/TrueView/83892824_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/TrueView/83892824_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/83556710_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/83556710_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/TrueView/80612582_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/TrueView/80612582_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/80596870_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/80596870_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/81875418_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/81875418_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/TrueView/80607578_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/TrueView/80607578_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/SecurePlus/25528880_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/SecurePlus/25528880_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/CoinFacts/25386815_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/CoinFacts/25386815_800x600.jpg\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<img src=\"https://images.pcgs.com/TrueView/82994298_800x600.jpg\" width=\"500\" height=\"600\"/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Image object>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Link: https://images.pcgs.com/TrueView/82994298_800x600.jpg\n" | |
] | |
} | |
], | |
"source": [ | |
"index = images.index\n", | |
"for i in xrange(len(index)):\n", | |
" Image(url= images[index[i]],width=500, height=600)\n", | |
" print 'Link: ',images[index[i]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment