Skip to content

Instantly share code, notes, and snippets.

@aa-gamJain
Last active January 15, 2018 06:25
Show Gist options
  • Save aa-gamJain/9eae9aecaf5a0f4d5c9a300a2a60a20d to your computer and use it in GitHub Desktop.
Save aa-gamJain/9eae9aecaf5a0f4d5c9a300a2a60a20d to your computer and use it in GitHub Desktop.
Coinioc Task 1
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing all Libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import requests\n",
"import numpy as np\n",
"import pandas as pd\n",
"from bs4 import BeautifulSoup\n",
"\n",
"from IPython.display import Image, display\n",
"from IPython.core.display import HTML \n",
"\n",
"pd.set_option('display.max_rows',100)\n",
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = \"all\"\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Getting data from the link"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"page = requests.get(\"https://www.pcgs.com/SetRegistry/dollars/morgan-dollars-major-sets/morgan-dollars-basic-set-circulation-strikes-1878-1921/publishedset/12698\")\n",
"soup = BeautifulSoup(page.content, 'html.parser')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Web Scraping"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame()\n",
"for tr in soup.find_all('tr')[1:]:\n",
" tds = tr.find_all('td')\n",
" if tds[1].find('a')==None:\n",
" df = df.append(pd.Series([np.NAN,tds[2].text,tds[3].text,tds[4].text,tds[5].text,tds[6].text,\n",
" tds[7].text,tds[8].text,tds[9].text,tds[10].text]),ignore_index=True)\n",
" else:\n",
" df = df.append(pd.Series([tds[1].find('a')['href'],tds[2].text,tds[3].text,tds[4].text,tds[5].text,tds[6].text,\n",
" tds[7].text,tds[8].text,tds[9].text,tds[10].text]),ignore_index=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data in that page"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Image_Link</th>\n",
" <th>Item</th>\n",
" <th>PCGS #</th>\n",
" <th>Date</th>\n",
" <th>Denom</th>\n",
" <th>Grade</th>\n",
" <th>PCGS # POP</th>\n",
" <th>PCGS # POP Higher</th>\n",
" <th>Pop</th>\n",
" <th>Pop Higher</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>https://images.pcgs.com/CoinFacts/83747140_800...</td>\n",
" <td>\\n1878 8TF\\n</td>\n",
" <td>7072</td>\n",
" <td>1878 8TF</td>\n",
" <td>$1</td>\n",
" <td>MS65</td>\n",
" <td>704</td>\n",
" <td>129</td>\n",
" <td>830</td>\n",
" <td>141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>https://images.pcgs.com/CoinFacts/81947412_800...</td>\n",
" <td>\\n1878 7TF\\n</td>\n",
" <td>7074</td>\n",
" <td>1878 7TF</td>\n",
" <td>$1</td>\n",
" <td>MS66+</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>\\n1878 7/8TF\\n</td>\n",
" <td>7078</td>\n",
" <td>1878 7/8TF</td>\n",
" <td>$1</td>\n",
" <td>MS65</td>\n",
" <td>344</td>\n",
" <td>43</td>\n",
" <td>632</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>\\n1878-CC\\n</td>\n",
" <td>7080</td>\n",
" <td>1878-CC</td>\n",
" <td>$1</td>\n",
" <td>MS66</td>\n",
" <td>313</td>\n",
" <td>53</td>\n",
" <td>328</td>\n",
" <td>53</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>\\n1878-S\\n</td>\n",
" <td>7082</td>\n",
" <td>1878-S</td>\n",
" <td>$1</td>\n",
" <td>MS66+</td>\n",
" <td>88</td>\n",
" <td>36</td>\n",
" <td>90</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NaN</td>\n",
" <td>\\n1879\\n</td>\n",
" <td>7084</td>\n",
" <td>1879</td>\n",
" <td>$1</td>\n",
" <td>MS65+</td>\n",
" <td>62</td>\n",
" <td>279</td>\n",
" <td>65</td>\n",
" <td>283</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NaN</td>\n",
" <td>\\n1879-CC\\n</td>\n",
" <td>7086</td>\n",
" <td>1879-CC</td>\n",
" <td>$1</td>\n",
" <td>MS64+</td>\n",
" <td>58</td>\n",
" <td>110</td>\n",
" <td>90</td>\n",
" <td>135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NaN</td>\n",
" <td>\\n1879-O\\n</td>\n",
" <td>7090</td>\n",
" <td>1879-O</td>\n",
" <td>$1</td>\n",
" <td>MS65</td>\n",
" <td>464</td>\n",
" <td>79</td>\n",
" <td>468</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>NaN</td>\n",
" <td>\\n1879-S\\n</td>\n",
" <td>7092</td>\n",
" <td>1879-S</td>\n",
" <td>$1</td>\n",
" <td>MS67</td>\n",
" <td>1553</td>\n",
" <td>260</td>\n",
" <td>1610</td>\n",
" <td>268</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>\\n1880\\n</td>\n",
" <td>7096</td>\n",
" <td>1880</td>\n",
" <td>$1</td>\n",
" <td>MS66</td>\n",
" <td>190</td>\n",
" <td>19</td>\n",
" <td>201</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Image_Link Item PCGS # \\\n",
"0 https://images.pcgs.com/CoinFacts/83747140_800... \\n1878 8TF\\n 7072 \n",
"1 https://images.pcgs.com/CoinFacts/81947412_800... \\n1878 7TF\\n 7074 \n",
"2 NaN \\n1878 7/8TF\\n 7078 \n",
"3 NaN \\n1878-CC\\n 7080 \n",
"4 NaN \\n1878-S\\n 7082 \n",
"5 NaN \\n1879\\n 7084 \n",
"6 NaN \\n1879-CC\\n 7086 \n",
"7 NaN \\n1879-O\\n 7090 \n",
"8 NaN \\n1879-S\\n 7092 \n",
"9 NaN \\n1880\\n 7096 \n",
"\n",
" Date Denom Grade PCGS # POP PCGS # POP Higher Pop Pop Higher \n",
"0 1878 8TF $1 MS65 704 129 830 141 \n",
"1 1878 7TF $1 MS66+ 8 0 8 1 \n",
"2 1878 7/8TF $1 MS65 344 43 632 72 \n",
"3 1878-CC $1 MS66 313 53 328 53 \n",
"4 1878-S $1 MS66+ 88 36 90 38 \n",
"5 1879 $1 MS65+ 62 279 65 283 \n",
"6 1879-CC $1 MS64+ 58 110 90 135 \n",
"7 1879-O $1 MS65 464 79 468 80 \n",
"8 1879-S $1 MS67 1553 260 1610 268 \n",
"9 1880 $1 MS66 190 19 201 19 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns = ['Image_Link','Item','PCGS #','Date','Denom','Grade','PCGS # POP','PCGS # POP Higher','Pop','Pop Higher']\n",
"df.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Links of all images"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 https://images.pcgs.com/CoinFacts/83747140_800...\n",
"1 https://images.pcgs.com/CoinFacts/81947412_800...\n",
"11 https://images.pcgs.com/TrueView/28120389_800x...\n",
"24 https://images.pcgs.com/TrueView/25288620_800x...\n",
"31 https://images.pcgs.com/CoinFacts/27669756_800...\n",
"34 https://images.pcgs.com/CoinFacts/83887969_800...\n",
"35 https://images.pcgs.com/TrueView/83892824_800x...\n",
"43 https://images.pcgs.com/CoinFacts/83556710_800...\n",
"50 https://images.pcgs.com/TrueView/80612582_800x...\n",
"54 https://images.pcgs.com/CoinFacts/80596870_800...\n",
"61 https://images.pcgs.com/CoinFacts/81875418_800...\n",
"65 https://images.pcgs.com/TrueView/80607578_800x...\n",
"66 https://images.pcgs.com/SecurePlus/25528880_80...\n",
"71 https://images.pcgs.com/CoinFacts/25386815_800...\n",
"87 https://images.pcgs.com/TrueView/82994298_800x...\n",
"Name: Image_Link, dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"images = df['Image_Link'][df.Image_Link.notnull()]\n",
"images"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Displaying all images with that link"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/83747140_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/83747140_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/81947412_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/81947412_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/TrueView/28120389_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/TrueView/28120389_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/TrueView/25288620_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/TrueView/25288620_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/27669756_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/27669756_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/83887969_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/83887969_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/TrueView/83892824_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/TrueView/83892824_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/83556710_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/83556710_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/TrueView/80612582_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/TrueView/80612582_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/80596870_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/80596870_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/81875418_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/81875418_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/TrueView/80607578_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/TrueView/80607578_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/SecurePlus/25528880_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/SecurePlus/25528880_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/CoinFacts/25386815_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/CoinFacts/25386815_800x600.jpg\n"
]
},
{
"data": {
"text/html": [
"<img src=\"https://images.pcgs.com/TrueView/82994298_800x600.jpg\" width=\"500\" height=\"600\"/>"
],
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Link: https://images.pcgs.com/TrueView/82994298_800x600.jpg\n"
]
}
],
"source": [
"index = images.index\n",
"for i in xrange(len(index)):\n",
" Image(url= images[index[i]],width=500, height=600)\n",
" print 'Link: ',images[index[i]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment