mrizwan47 · September 16, 2018 19:31
diff --git a/amazon-scraping.ipynb b/amazon-scraping.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Scraping From Amazon and hitting JungleScout API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "import re\n",
    "import urllib.parse\n",
    "from IPython.core.display import display, HTML"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Excel File"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your first 5 rows look like this:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ASIN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B0014CHDYO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B072PCLQDK</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B00076HTH4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B00KCNUJ24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>B000BWP6HY</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         ASIN\n",
       "0  B0014CHDYO\n",
       "1  B072PCLQDK\n",
       "2  B00076HTH4\n",
       "3  B00KCNUJ24\n",
       "4  B000BWP6HY"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Change this:\n",
    "upload_file_name = '19905.xlsx'\n",
    "\n",
    "\n",
    "data = pd.read_excel('data/{}'.format(upload_file_name))\n",
    "print( \"Your first 5 rows look like this:\" )\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Request Daily Token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your daily token is: FkAm2B8Tw7yvGnN8j3um8A==\n"
     ]
    }
   ],
   "source": [
    "token_req = requests.get(url='https://junglescoutpro.herokuapp.com/api/v1/users/[email protected]').json()\n",
    "token = token_req['daily_token']\n",
    "\n",
    "print('Your daily token is: {}'.format(token))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "B0014CHDYO: 60\n",
      "B072PCLQDK: 368\n",
      "B00076HTH4: 50\n",
      "B00KCNUJ24: 5\n",
      "B000BWP6HY: < 5\n",
      "B00DFVNWXS: < 5\n",
      "B008JQF9I0: < 5\n",
      "B00WI1CXKY: < 5\n",
      "B06XWBC5H1: < 5\n",
      "B00A3PHR94: < 5\n",
      "B00UG0Z892: < 5\n",
      "B01AYOKMP4: < 5\n",
      "B00DWGFW52: < 5\n",
      "B00DHCBR7I: < 5\n",
      "B002XV9JLU: < 5\n",
      "B002UUV2DW: 13645\n",
      "B007XO02IQ: 18\n",
      "B00BMCGM2K: < 5\n",
      "B00LRN42WG: FAILED, sales rank not found\n",
      "B00I9SI40S: 3156\n",
      "B01KMC5C2G: < 5\n",
      "B078RSPRMJ: FAILED, sales rank not found\n",
      "B00CJQ5ASK: 9\n",
      "B00PWA83LQ: < 5\n",
      "B00YFZ8LKC: < 5\n",
      "B000O8MZQK: < 5\n",
      "B01KMC5BZE: < 5\n",
      "B0006G4Z5Y: < 5\n",
      "B00JA55MNG: 8\n",
      "B0085EYBPI: < 5\n",
      "B000255PBC: 90\n",
      "B000EY4YLS: 3649\n",
      "B000Q5NDHG: 40\n",
      "B01C4K7N8A: < 5\n",
      "B01N3Q6IDH: < 5\n",
      "B0017JG296: < 5\n",
      "B0021L8XBO: 48\n",
      "B002SX5H00: < 5\n",
      "B004YCJPT2: 301\n",
      "B009C06HWO: < 5\n",
      "B00AFBKZUO: 76\n",
      "B00CJNGKO6: 68\n",
      "B00M6SYB6S: < 5\n",
      "B00TYZHUKA: 137\n",
      "B015690284: < 5\n",
      "B017QM2OPS: 59\n",
      "B01AT0VSSI: 60\n",
      "B01B45AMIO: 40\n",
      "B01N0SNRLJ: 30\n",
      "B009P4QGLO: < 5\n",
      "B00B35JA3E: < 5\n",
      "B01D9OC4SO: < 5\n",
      "B01N3VVUK7: < 5\n",
      "B0713PVDVB: < 5\n",
      "B008Y1PJBW: 963\n",
      "B00B3RQCGA: 88\n",
      "B01IJZFZ5W: < 5\n",
      "B0050OQ3AW: < 5\n",
      "B0089VTAFS: < 5\n",
      "B00DFVOA52: < 5\n",
      "B00JSMWE7I: < 5\n",
      "B00K0PQII8: 2331\n",
      "B01GKE273W: < 5\n",
      "B06WW8WHD3: < 5\n",
      "B004HIM5AO: < 5\n",
      "B007ROC212: < 5\n",
      "B00HC8XCD0: < 5\n",
      "B00JSMVMXK: < 5\n",
      "B00RA1H92Y: 18\n",
      "B016ZZ6ER6: < 5\n",
      "B0006G53ZA: < 5\n",
      "B00EZM8O52: < 5\n",
      "B01M4J5B4S: < 5\n",
      "B01MXGTJ63: < 5\n",
      "B01N06D3VR: < 5\n",
      "B0006N9YNK: < 5\n",
      "B0035HB75C: < 5\n",
      "B004LO7MNO: 81\n",
      "B0053AUWYQ: < 5\n",
      "B01L1YHG2I: < 5\n",
      "B01N139898: < 5\n",
      "B06VXY29ZK: < 5\n",
      "B06WD8137L: < 5\n",
      "B06X3ZY21K: < 5\n",
      "B06X94VWLX: N.A.\n",
      "B0002AQW64: < 5\n",
      "B001ATBBUS: 30\n",
      "B0035H5HSU: < 5\n",
      "B00C6UXR0W: < 5\n",
      "B01M97ROYT: < 5\n",
      "B06VXY29NG: < 5\n",
      "B000255N0A: 443\n",
      "B0002DH3EG: < 5\n",
      "B000JKO1T2: 8\n",
      "B007CE4FPI: < 5\n",
      "B00A27W2YI: < 5\n",
      "B00CJQ3PWI: 30\n",
      "B00CWHKJFU: < 5\n",
      "B00D04REDE: < 5\n",
      "B00T85F1J8: < 5\n",
      "B01N0WQ9S9: < 5\n",
      "B06VY5DXY7: < 5\n",
      "B0018CJNDO: < 5\n",
      "B009VBTZUK: < 5\n",
      "B00CJQ5FYO: < 5\n",
      "B016YXFMO0: < 5\n",
      "B004HIM51S: < 5\n",
      "B004WXPVF0: < 5\n",
      "B009VBQD5A: < 5\n",
      "B00YJLAN9E: < 5\n",
      "B010UNGW94: FAILED, sales rank not found\n",
      "B016YXFIRQ: FAILED, sales rank not found\n",
      "B01GKE1MC4: < 5\n",
      "B01GKTFELE: < 5\n",
      "B06VY5F42N: N.A.\n",
      "B06WVGV3N2: < 5\n",
      "B0002AQ0N4: < 5\n",
      "B000R4KWNE: 60\n",
      "B0011Z67KU: < 5\n",
      "B003DASH9U: 502\n",
      "B005GW8DJC: < 5\n",
      "B005ZSHUFK: < 5\n",
      "B00BCYGJHG: < 5\n",
      "B00BCYGNRC: < 5\n",
      "B00CJQ3LI6: < 5\n",
      "B00JSMX5XA: 10\n",
      "B018K7ICJ0: < 5\n",
      "B01IJZFYYY: < 5\n",
      "B01M62HRLH: < 5\n",
      "B0002DI65G: < 5\n",
      "B0002DI8CW: < 5\n",
      "B0032GEEJ2: < 5\n",
      "B00CJQ3SWU: 22\n",
      "B00JSMWCF2: < 5\n",
      "B00YJL29PU: < 5\n",
      "B01EMUX2X0: 125\n",
      "B01M21E7MY: < 5\n",
      "B072R1VD16: < 5\n",
      "B0002AQIA4: < 5\n",
      "B0002AT14O: 28\n",
      "B0002DJZX8: 1246\n",
      "B003OCXQDE: 423\n",
      "B00I4YWY9O: < 5\n",
      "B0035HB8YC: < 5\n",
      "B00CJQ3P3C: < 5\n",
      "B00494JWY8: 2348\n",
      "Ready to save!\n"
     ]
    }
   ],
   "source": [
    "cols = ['ASIN', 'Estimated Sales']\n",
    "lst = []\n",
    "for asin in data['ASIN']:\n",
    "    if not \"Not found on Amazon\" == asin:\n",
    "        product_url = 'https://www.amazon.com/dp/{}'.format(asin)\n",
    "        page_content = requests.get(url=product_url).content\n",
    "        m = re.search('SalesRank.*?\\#([0-9,]+)\\s+[a-z]+\\s+(.*?)\\s+\\(', str(page_content))\n",
    "        if m:\n",
    "            salesrank = m.groups()[0].replace(',', '')\n",
    "            category = urllib.parse.quote(m.groups()[1])\n",
    "            estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin, salesrank, category, token), headers={'referer': product_url}).json()\n",
    "            lst.append([asin, estSalesReq['estSalesResult']])\n",
    "            print( '{}: {}'.format(asin, estSalesReq['estSalesResult']))\n",
    "        else:\n",
    "            \n",
    "            rank_cat_req = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&dailyToken={}\".format(asin, token), headers={'referer': product_url}).json()            \n",
    "            if not 'rank' in rank_cat_req.keys():\n",
    "                lst.append([asin, 'FAILED, sales rank not found'])\n",
    "                print( '{}: FAILED, sales rank not found'.format(asin))\n",
    "            else:\n",
    "                salesrank = rank_cat_req['rank']\n",
    "                category = urllib.parse.quote(rank_cat_req['category'])            \n",
    "                estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin, salesrank, category, token), headers={'referer': product_url}).json()\n",
    "                \n",
    "                lst.append([asin, estSalesReq['estSalesResult']])\n",
    "                print( '{}: {}'.format(asin, estSalesReq['estSalesResult']))\n",
    "print( 'Ready to save!' )\n",
    "new_df = pd.DataFrame(lst, columns=['ASIN', 'Estimated Sales'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Save CSV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<a target=\"_blank\" href=\"data/Saved CSVs/test.csv\">Download File</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Change this (existing file will be replaced)\n",
    "save_file_name = 'test.csv'\n",
    "\n",
    "\n",
    "new_df.to_csv('data/Saved CSVs/{}'.format(save_file_name), index=False)\n",
    "display(HTML('<a target=\"_blank\" href=\"data/Saved CSVs/{}\">Download File</a>'.format(save_file_name)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Single ASIN Check"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "B0002DJZX8: 1243\n"
     ]
    }
   ],
   "source": [
    "# Change this:\n",
    "asin_number = \"B0002DJZX8\"\n",
    "\n",
    "\n",
    "if asin_number:\n",
    "        product_url = 'https://www.amazon.com/dp/{}'.format(asin_number)\n",
    "        page_content = requests.get(url=product_url).content\n",
    "        m = re.search('SalesRank.*?\\#([0-9,]+)\\s+[a-z]+\\s+(.*?)\\s+\\(', str(page_content))\n",
    "        if m:\n",
    "            salesrank = m.groups()[0].replace(',', '')\n",
    "            category = urllib.parse.quote(m.groups()[1])\n",
    "            estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin_number, salesrank, category, token), headers={'referer': product_url}).json()\n",
    "            print( '{}: {}'.format(asin_number, estSalesReq['estSalesResult']))\n",
    "        else:\n",
    "            \n",
    "            rank_cat_req = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&dailyToken={}\".format(asin_number, token), headers={'referer': product_url}).json()            \n",
    "            if not 'rank' in rank_cat_req.keys():\n",
    "                print( '{}: FAILED, sales rank not found'.format(asin_number))\n",
    "            else:\n",
    "                salesrank = rank_cat_req['rank']\n",
    "                category = urllib.parse.quote(rank_cat_req['category'])            \n",
    "                estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin_number, salesrank, category, token), headers={'referer': product_url}).json()\n",
    "                \n",
    "                print( '{}: {}'.format(asin_number, estSalesReq['estSalesResult']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Scraping From Amazon and hitting JungleScout API"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 36,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import requests\n",
	"import re\n",
	"import urllib.parse\n",
	"from IPython.core.display import display, HTML"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Import Excel File"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 46,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Your first 5 rows look like this:\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>ASIN</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>B0014CHDYO</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>B072PCLQDK</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>B00076HTH4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>B00KCNUJ24</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>B000BWP6HY</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" ASIN\n",
	"0 B0014CHDYO\n",
	"1 B072PCLQDK\n",
	"2 B00076HTH4\n",
	"3 B00KCNUJ24\n",
	"4 B000BWP6HY"
	]
	},
	"execution_count": 46,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Change this:\n",
	"upload_file_name = '19905.xlsx'\n",
	"\n",
	"\n",
	"data = pd.read_excel('data/{}'.format(upload_file_name))\n",
	"print( \"Your first 5 rows look like this:\" )\n",
	"data.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Request Daily Token"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 47,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Your daily token is: FkAm2B8Tw7yvGnN8j3um8A==\n"
	]
	}
	],
	"source": [
	"token_req = requests.get(url='https://junglescoutpro.herokuapp.com/api/v1/users/[email protected]').json()\n",
	"token = token_req['daily_token']\n",
	"\n",
	"print('Your daily token is: {}'.format(token))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Process"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 48,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"B0014CHDYO: 60\n",
	"B072PCLQDK: 368\n",
	"B00076HTH4: 50\n",
	"B00KCNUJ24: 5\n",
	"B000BWP6HY: < 5\n",
	"B00DFVNWXS: < 5\n",
	"B008JQF9I0: < 5\n",
	"B00WI1CXKY: < 5\n",
	"B06XWBC5H1: < 5\n",
	"B00A3PHR94: < 5\n",
	"B00UG0Z892: < 5\n",
	"B01AYOKMP4: < 5\n",
	"B00DWGFW52: < 5\n",
	"B00DHCBR7I: < 5\n",
	"B002XV9JLU: < 5\n",
	"B002UUV2DW: 13645\n",
	"B007XO02IQ: 18\n",
	"B00BMCGM2K: < 5\n",
	"B00LRN42WG: FAILED, sales rank not found\n",
	"B00I9SI40S: 3156\n",
	"B01KMC5C2G: < 5\n",
	"B078RSPRMJ: FAILED, sales rank not found\n",
	"B00CJQ5ASK: 9\n",
	"B00PWA83LQ: < 5\n",
	"B00YFZ8LKC: < 5\n",
	"B000O8MZQK: < 5\n",
	"B01KMC5BZE: < 5\n",
	"B0006G4Z5Y: < 5\n",
	"B00JA55MNG: 8\n",
	"B0085EYBPI: < 5\n",
	"B000255PBC: 90\n",
	"B000EY4YLS: 3649\n",
	"B000Q5NDHG: 40\n",
	"B01C4K7N8A: < 5\n",
	"B01N3Q6IDH: < 5\n",
	"B0017JG296: < 5\n",
	"B0021L8XBO: 48\n",
	"B002SX5H00: < 5\n",
	"B004YCJPT2: 301\n",
	"B009C06HWO: < 5\n",
	"B00AFBKZUO: 76\n",
	"B00CJNGKO6: 68\n",
	"B00M6SYB6S: < 5\n",
	"B00TYZHUKA: 137\n",
	"B015690284: < 5\n",
	"B017QM2OPS: 59\n",
	"B01AT0VSSI: 60\n",
	"B01B45AMIO: 40\n",
	"B01N0SNRLJ: 30\n",
	"B009P4QGLO: < 5\n",
	"B00B35JA3E: < 5\n",
	"B01D9OC4SO: < 5\n",
	"B01N3VVUK7: < 5\n",
	"B0713PVDVB: < 5\n",
	"B008Y1PJBW: 963\n",
	"B00B3RQCGA: 88\n",
	"B01IJZFZ5W: < 5\n",
	"B0050OQ3AW: < 5\n",
	"B0089VTAFS: < 5\n",
	"B00DFVOA52: < 5\n",
	"B00JSMWE7I: < 5\n",
	"B00K0PQII8: 2331\n",
	"B01GKE273W: < 5\n",
	"B06WW8WHD3: < 5\n",
	"B004HIM5AO: < 5\n",
	"B007ROC212: < 5\n",
	"B00HC8XCD0: < 5\n",
	"B00JSMVMXK: < 5\n",
	"B00RA1H92Y: 18\n",
	"B016ZZ6ER6: < 5\n",
	"B0006G53ZA: < 5\n",
	"B00EZM8O52: < 5\n",
	"B01M4J5B4S: < 5\n",
	"B01MXGTJ63: < 5\n",
	"B01N06D3VR: < 5\n",
	"B0006N9YNK: < 5\n",
	"B0035HB75C: < 5\n",
	"B004LO7MNO: 81\n",
	"B0053AUWYQ: < 5\n",
	"B01L1YHG2I: < 5\n",
	"B01N139898: < 5\n",
	"B06VXY29ZK: < 5\n",
	"B06WD8137L: < 5\n",
	"B06X3ZY21K: < 5\n",
	"B06X94VWLX: N.A.\n",
	"B0002AQW64: < 5\n",
	"B001ATBBUS: 30\n",
	"B0035H5HSU: < 5\n",
	"B00C6UXR0W: < 5\n",
	"B01M97ROYT: < 5\n",
	"B06VXY29NG: < 5\n",
	"B000255N0A: 443\n",
	"B0002DH3EG: < 5\n",
	"B000JKO1T2: 8\n",
	"B007CE4FPI: < 5\n",
	"B00A27W2YI: < 5\n",
	"B00CJQ3PWI: 30\n",
	"B00CWHKJFU: < 5\n",
	"B00D04REDE: < 5\n",
	"B00T85F1J8: < 5\n",
	"B01N0WQ9S9: < 5\n",
	"B06VY5DXY7: < 5\n",
	"B0018CJNDO: < 5\n",
	"B009VBTZUK: < 5\n",
	"B00CJQ5FYO: < 5\n",
	"B016YXFMO0: < 5\n",
	"B004HIM51S: < 5\n",
	"B004WXPVF0: < 5\n",
	"B009VBQD5A: < 5\n",
	"B00YJLAN9E: < 5\n",
	"B010UNGW94: FAILED, sales rank not found\n",
	"B016YXFIRQ: FAILED, sales rank not found\n",
	"B01GKE1MC4: < 5\n",
	"B01GKTFELE: < 5\n",
	"B06VY5F42N: N.A.\n",
	"B06WVGV3N2: < 5\n",
	"B0002AQ0N4: < 5\n",
	"B000R4KWNE: 60\n",
	"B0011Z67KU: < 5\n",
	"B003DASH9U: 502\n",
	"B005GW8DJC: < 5\n",
	"B005ZSHUFK: < 5\n",
	"B00BCYGJHG: < 5\n",
	"B00BCYGNRC: < 5\n",
	"B00CJQ3LI6: < 5\n",
	"B00JSMX5XA: 10\n",
	"B018K7ICJ0: < 5\n",
	"B01IJZFYYY: < 5\n",
	"B01M62HRLH: < 5\n",
	"B0002DI65G: < 5\n",
	"B0002DI8CW: < 5\n",
	"B0032GEEJ2: < 5\n",
	"B00CJQ3SWU: 22\n",
	"B00JSMWCF2: < 5\n",
	"B00YJL29PU: < 5\n",
	"B01EMUX2X0: 125\n",
	"B01M21E7MY: < 5\n",
	"B072R1VD16: < 5\n",
	"B0002AQIA4: < 5\n",
	"B0002AT14O: 28\n",
	"B0002DJZX8: 1246\n",
	"B003OCXQDE: 423\n",
	"B00I4YWY9O: < 5\n",
	"B0035HB8YC: < 5\n",
	"B00CJQ3P3C: < 5\n",
	"B00494JWY8: 2348\n",
	"Ready to save!\n"
	]
	}
	],
	"source": [
	"cols = ['ASIN', 'Estimated Sales']\n",
	"lst = []\n",
	"for asin in data['ASIN']:\n",
	" if not \"Not found on Amazon\" == asin:\n",
	" product_url = 'https://www.amazon.com/dp/{}'.format(asin)\n",
	" page_content = requests.get(url=product_url).content\n",
	" m = re.search('SalesRank.?\\#([0-9,]+)\\s+[a-z]+\\s+(.?)\\s+\\(', str(page_content))\n",
	" if m:\n",
	" salesrank = m.groups()[0].replace(',', '')\n",
	" category = urllib.parse.quote(m.groups()[1])\n",
	" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin, salesrank, category, token), headers={'referer': product_url}).json()\n",
	" lst.append([asin, estSalesReq['estSalesResult']])\n",
	" print( '{}: {}'.format(asin, estSalesReq['estSalesResult']))\n",
	" else:\n",
	" \n",
	" rank_cat_req = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&dailyToken={}\".format(asin, token), headers={'referer': product_url}).json() \n",
	" if not 'rank' in rank_cat_req.keys():\n",
	" lst.append([asin, 'FAILED, sales rank not found'])\n",
	" print( '{}: FAILED, sales rank not found'.format(asin))\n",
	" else:\n",
	" salesrank = rank_cat_req['rank']\n",
	" category = urllib.parse.quote(rank_cat_req['category']) \n",
	" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin, salesrank, category, token), headers={'referer': product_url}).json()\n",
	" \n",
	" lst.append([asin, estSalesReq['estSalesResult']])\n",
	" print( '{}: {}'.format(asin, estSalesReq['estSalesResult']))\n",
	"print( 'Ready to save!' )\n",
	"new_df = pd.DataFrame(lst, columns=['ASIN', 'Estimated Sales'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Save CSV"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 49,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<a target=\"_blank\" href=\"data/Saved CSVs/test.csv\">Download File</a>"
	],
	"text/plain": [
	"<IPython.core.display.HTML object>"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"# Change this (existing file will be replaced)\n",
	"save_file_name = 'test.csv'\n",
	"\n",
	"\n",
	"new_df.to_csv('data/Saved CSVs/{}'.format(save_file_name), index=False)\n",
	"display(HTML('<a target=\"_blank\" href=\"data/Saved CSVs/{}\">Download File</a>'.format(save_file_name)))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"collapsed": true
	},
	"source": [
	"# Single ASIN Check"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 56,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"B0002DJZX8: 1243\n"
	]
	}
	],
	"source": [
	"# Change this:\n",
	"asin_number = \"B0002DJZX8\"\n",
	"\n",
	"\n",
	"if asin_number:\n",
	" product_url = 'https://www.amazon.com/dp/{}'.format(asin_number)\n",
	" page_content = requests.get(url=product_url).content\n",
	" m = re.search('SalesRank.?\\#([0-9,]+)\\s+[a-z]+\\s+(.?)\\s+\\(', str(page_content))\n",
	" if m:\n",
	" salesrank = m.groups()[0].replace(',', '')\n",
	" category = urllib.parse.quote(m.groups()[1])\n",
	" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin_number, salesrank, category, token), headers={'referer': product_url}).json()\n",
	" print( '{}: {}'.format(asin_number, estSalesReq['estSalesResult']))\n",
	" else:\n",
	" \n",
	" rank_cat_req = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&dailyToken={}\".format(asin_number, token), headers={'referer': product_url}).json() \n",
	" if not 'rank' in rank_cat_req.keys():\n",
	" print( '{}: FAILED, sales rank not found'.format(asin_number))\n",
	" else:\n",
	" salesrank = rank_cat_req['rank']\n",
	" category = urllib.parse.quote(rank_cat_req['category']) \n",
	" estSalesReq = requests.get(\"https://junglescoutpro.herokuapp.com/api/v1/est_sales?store=us&asin={}&rank={}&category={}&dailyToken={}\".format(asin_number, salesrank, category, token), headers={'referer': product_url}).json()\n",
	" \n",
	" print( '{}: {}'.format(asin_number, estSalesReq['estSalesResult']))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}