Skip to content

Instantly share code, notes, and snippets.

@mrizwan47
Created September 16, 2018 19:31
Show Gist options
  • Save mrizwan47/c7ebe55da843501fe62522579577ff68 to your computer and use it in GitHub Desktop.
Save mrizwan47/c7ebe55da843501fe62522579577ff68 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scraping From Amazon and hitting JungleScout API"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"import re\n",
"import urllib.parse\n",
"from IPython.core.display import display, HTML"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import Excel File"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your first 5 rows look like this:\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ASIN</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>B0014CHDYO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>B072PCLQDK</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>B00076HTH4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>B00KCNUJ24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>B000BWP6HY</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ASIN\n",
"0 B0014CHDYO\n",
"1 B072PCLQDK\n",
"2 B00076HTH4\n",
"3 B00KCNUJ24\n",
"4 B000BWP6HY"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Change this:\n",
"upload_file_name = '19905.xlsx'\n",
"\n",
"\n",
"data = pd.read_excel('data/{}'.format(upload_file_name))\n",
"print( \"Your first 5 rows look like this:\" )\n",
"data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Request Daily Token"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your daily token is: FkAm2B8Tw7yvGnN8j3um8A==\n"
]
}
],
"source": [
"token_req = requests.get(url='https://junglescoutpro.herokuapp.com/api/v1/users/[email protected]').json()\n",
"token = token_req['daily_token']\n",
"\n",
"print('Your daily token is: {}'.format(token))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Process"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"B0014CHDYO: 60\n",
"B072PCLQDK: 368\n",
"B00076HTH4: 50\n",
"B00KCNUJ24: 5\n",
"B000BWP6HY: < 5\n",
"B00DFVNWXS: < 5\n",
"B008JQF9I0: < 5\n",
"B00WI1CXKY: < 5\n",
"B06XWBC5H1: < 5\n",
"B00A3PHR94: < 5\n",
"B00UG0Z892: < 5\n",
"B01AYOKMP4: < 5\n",
"B00DWGFW52: < 5\n",
"B00DHCBR7I: < 5\n",
"B002XV9JLU: < 5\n",
"B002UUV2DW: 13645\n",
"B007XO02IQ: 18\n",
"B00BMCGM2K: < 5\n",
"B00LRN42WG: FAILED, sales rank not found\n",
"B00I9SI40S: 3156\n",
"B01KMC5C2G: < 5\n",
"B078RSPRMJ: FAILED, sales rank not found\n",
"B00CJQ5ASK: 9\n",
"B00PWA83LQ: < 5\n",
"B00YFZ8LKC: < 5\n",
"B000O8MZQK: < 5\n",
"B01KMC5BZE: < 5\n",
"B0006G4Z5Y: < 5\n",
"B00JA55MNG: 8\n",
"B0085EYBPI: < 5\n",
"B000255PBC: 90\n",
"B000EY4YLS: 3649\n",
"B000Q5NDHG: 40\n",
"B01C4K7N8A: < 5\n",
"B01N3Q6IDH: < 5\n",
"B0017JG296: < 5\n",
"B0021L8XBO: 48\n",
"B002SX5H00: < 5\n",
"B004YCJPT2: 301\n",
"B009C06HWO: < 5\n",
"B00AFBKZUO: 76\n",
"B00CJNGKO6: 68\n",
"B00M6SYB6S: < 5\n",
"B00TYZHUKA: 137\n",
"B015690284: < 5\n",
"B017QM2OPS: 59\n",
"B01AT0VSSI: 60\n",
"B01B45AMIO: 40\n",
"B01N0SNRLJ: 30\n",
"B009P4QGLO: < 5\n",
"B00B35JA3E: < 5\n",
"B01D9OC4SO: < 5\n",
"B01N3VVUK7: < 5\n",
"B0713PVDVB: < 5\n",
"B008Y1PJBW: 963\n",
"B00B3RQCGA: 88\n",
"B01IJZFZ5W: < 5\n",
"B0050OQ3AW: < 5\n",
"B0089VTAFS: < 5\n",
"B00DFVOA52: < 5\n",
"B00JSMWE7I: < 5\n",
"B00K0PQII8: 2331\n",
"B01GKE273W: < 5\n",
"B06WW8WHD3: < 5\n",
"B004HIM5AO: < 5\n",
"B007ROC212: < 5\n",
"B00HC8XCD0: < 5\n",
"B00JSMVMXK: < 5\n",
"B00RA1H92Y: 18\n",
"B016ZZ6ER6: < 5\n",
"B0006G53ZA: < 5\n",
"B00EZM8O52: < 5\n",
"B01M4J5B4S: < 5\n",
"B01MXGTJ63: < 5\n",
"B01N06D3VR: < 5\n",
"B0006N9YNK: < 5\n",
"B0035HB75C: < 5\n",
"B004LO7MNO: 81\n",
"B0053AUWYQ: < 5\n",
"B01L1YHG2I: < 5\n",
"B01N139898: < 5\n",
"B06VXY29ZK: < 5\n",
"B06WD8137L: < 5\n",
"B06X3ZY21K: < 5\n",
"B06X94VWLX: N.A.\n",
"B0002AQW64: < 5\n",
"B001ATBBUS: 30\n",
"B0035H5HSU: < 5\n",
"B00C6UXR0W: < 5\n",
"B01M97ROYT: < 5\n",
"B06VXY29NG: < 5\n",
"B000255N0A: 443\n",
"B0002DH3EG: < 5\n",
"B000JKO1T2: 8\n",
"B007CE4FPI: < 5\n",
"B00A27W2YI: < 5\n",
"B00CJQ3PWI: 30\n",
"B00CWHKJFU: < 5\n",
"B00D04REDE: < 5\n",
"B00T85F1J8: < 5\n",
"B01N0WQ9S9: < 5\n",
"B06VY5DXY7: < 5\n",
"B0018CJNDO: < 5\n",
"B009VBTZUK: < 5\n",
"B00CJQ5FYO: < 5\n",
"B016YXFMO0: < 5\n",
"B004HIM51S: < 5\n",
"B004WXPVF0: < 5\n",
"B009VBQD5A: < 5\n",
"B00YJLAN9E: < 5\n",
"B010UNGW94: FAILED, sales rank not found\n",
"B016YXFIRQ: FAILED, sales rank not found\n",
"B01GKE1MC4: < 5\n",
"B01GKTFELE: < 5\n",
"B06VY5F42N: N.A.\n",
"B06WVGV3N2: < 5\n",
"B0002AQ0N4: < 5\n",
"B000R4KWNE: 60\n",
"B0011Z67KU: < 5\n",
"B003DASH9U: 502\n",
"B005GW8DJC: < 5\n",
"B005ZSHUFK: < 5\n",
"B00BCYGJHG: < 5\n",
"B00BCYGNRC: < 5\n",
"B00CJQ3LI6: < 5\n",
"B00JSMX5XA: 10\n",
"B018K7ICJ0: < 5\n",
"B01IJZFYYY: < 5\n",
"B01M62HRLH: < 5\n",
"B0002DI65G: < 5\n",
"B0002DI8CW: < 5\n",
"B0032GEEJ2: < 5\n",
"B00CJQ3SWU: 22\n",
"B00JSMWCF2: < 5\n",
"B00YJL29PU: < 5\n",
"B01EMUX2X0: 125\n",
"B01M21E7MY: < 5\n",
"B072R1VD16: < 5\n",
"B0002AQIA4: < 5\n",
"B0002AT14O: 28\n",
"B0002DJZX8: 1246\n",
"B003OCXQDE: 423\n",
"B00I4YWY9O: < 5\n",
"B0035HB8YC: < 5\n",
"B00CJQ3P3C: < 5\n",
"B00494JWY8: 2348\n",
"Ready to save!\n"
]
}
],
"source": [
"cols = ['ASIN', 'Estimated Sales']\n",
"lst = []\n",
"for asin in data['ASIN']:\n",
" if not \"Not found on Amazon\" == asin:\n",
" product_url = 'https://www.amazon.com/dp/{}'.format(asin)\n",
" page_content = requests.get(url=product_url).content\n",
" m = re.search('SalesRank.*?\\#([0-9,]+)\\s+[a-z]+\\s+(.*?)\\s+\\(', str(page_content))\n",
" if m:\n",
" salesrank = m.groups()[0].replace(',', '')\n",
" category = urllib.parse.quote(m.groups()[1])\n",
" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin, salesrank, category, token), headers={'referer': product_url}).json()\n",
" lst.append([asin, estSalesReq['estSalesResult']])\n",
" print( '{}: {}'.format(asin, estSalesReq['estSalesResult']))\n",
" else:\n",
" \n",
" rank_cat_req = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&dailyToken={}\".format(asin, token), headers={'referer': product_url}).json() \n",
" if not 'rank' in rank_cat_req.keys():\n",
" lst.append([asin, 'FAILED, sales rank not found'])\n",
" print( '{}: FAILED, sales rank not found'.format(asin))\n",
" else:\n",
" salesrank = rank_cat_req['rank']\n",
" category = urllib.parse.quote(rank_cat_req['category']) \n",
" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin, salesrank, category, token), headers={'referer': product_url}).json()\n",
" \n",
" lst.append([asin, estSalesReq['estSalesResult']])\n",
" print( '{}: {}'.format(asin, estSalesReq['estSalesResult']))\n",
"print( 'Ready to save!' )\n",
"new_df = pd.DataFrame(lst, columns=['ASIN', 'Estimated Sales'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Save CSV"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<a target=\"_blank\" href=\"data/Saved CSVs/test.csv\">Download File</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Change this (existing file will be replaced)\n",
"save_file_name = 'test.csv'\n",
"\n",
"\n",
"new_df.to_csv('data/Saved CSVs/{}'.format(save_file_name), index=False)\n",
"display(HTML('<a target=\"_blank\" href=\"data/Saved CSVs/{}\">Download File</a>'.format(save_file_name)))"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Single ASIN Check"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"B0002DJZX8: 1243\n"
]
}
],
"source": [
"# Change this:\n",
"asin_number = \"B0002DJZX8\"\n",
"\n",
"\n",
"if asin_number:\n",
" product_url = 'https://www.amazon.com/dp/{}'.format(asin_number)\n",
" page_content = requests.get(url=product_url).content\n",
" m = re.search('SalesRank.*?\\#([0-9,]+)\\s+[a-z]+\\s+(.*?)\\s+\\(', str(page_content))\n",
" if m:\n",
" salesrank = m.groups()[0].replace(',', '')\n",
" category = urllib.parse.quote(m.groups()[1])\n",
" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin_number, salesrank, category, token), headers={'referer': product_url}).json()\n",
" print( '{}: {}'.format(asin_number, estSalesReq['estSalesResult']))\n",
" else:\n",
" \n",
" rank_cat_req = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&dailyToken={}\".format(asin_number, token), headers={'referer': product_url}).json() \n",
" if not 'rank' in rank_cat_req.keys():\n",
" print( '{}: FAILED, sales rank not found'.format(asin_number))\n",
" else:\n",
" salesrank = rank_cat_req['rank']\n",
" category = urllib.parse.quote(rank_cat_req['category']) \n",
" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin_number, salesrank, category, token), headers={'referer': product_url}).json()\n",
" \n",
" print( '{}: {}'.format(asin_number, estSalesReq['estSalesResult']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment