Skip to content

Instantly share code, notes, and snippets.

@GeorgyGol
Created October 18, 2018 18:06
Show Gist options
  • Save GeorgyGol/b5a88ecc93ea4db6d84493437125c466 to your computer and use it in GitHub Desktop.
Save GeorgyGol/b5a88ecc93ea4db6d84493437125c466 to your computer and use it in GitHub Desktop.
Fuels data read from WWW
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Find fuel prices\n",
"from various WWW"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import pandas as pd\n",
"import sqlite3\n",
"import requests\n",
"import re\n",
"import numpy as np\n",
"from datetime import datetime as dt\n",
"from datetime import date\n",
"import sqlalchemy as sa\n",
"from itertools import chain\n",
"\n",
"%run '../serv_energy.py'\n",
"\n",
"cstrEIABrent_spot=r'https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=RBRTE&f={0}'\n",
"cstrEIAWTI_spot=r'https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=RWTC&f={0}'\n",
"\n",
"#print(get_date_list(1987)) # check import serv_energy\n",
"\n",
"#conn=sqlite3.connect('fuels.sqlite3')\n",
"eng_co=sa.create_engine('sqlite+pysqlite:///fuels.sqlite3') #, echo=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"at this address - http://www.worldbank.org/en/research/commodity-markets - is a XLSX-file with monthly prices for resources from the World Bank. OIL, COAL, GAS ets.\n",
"\n",
"Next func read and parse this file, return ALL columns"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [],
"source": [
"strWB=r'http://www.worldbank.org/en/research/commodity-markets'\n",
"\n",
"def read_wb_prices():\n",
" sessWB=requests.session()\n",
" wb_html=sessWB.get(strWB)\n",
" wb_html.encoding=wb_html.apparent_encoding\n",
" wb_soup=BeautifulSoup(wb_html.text, 'html.parser')\n",
"\n",
" str_price_pattern=re.compile('Monthly prices', re.MULTILINE)\n",
" url_prices=wb_soup('a', string=str_price_pattern)[0]['href']\n",
" pdfWBPrices=pd.read_excel(r'/home/egor/Загрузки/CMOHistoricalDataMonthly.xlsx', sheet_name='Monthly Prices', \n",
" header=6, na_values='..', index_col=0)\n",
" pdfWBPrices.index.name='Date'\n",
" pdfWBPrices.index=pd.to_datetime(pdfWBPrices.index, format='%YM%m')\n",
" pdfWBPrices.applymap(lambda x: float(x))\n",
" return pdfWBPrices"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## OIL\n",
"Brent, WTI, spot and futures\n",
"### Sources: \n",
"1. https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=RBRTE&f=M"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### BRENT & WTI, spot, dollars per barrel\n",
"source: USA EIA (Energy Information Administration)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"dctFrequency={'Daily':'D', 'Weekly':'W', 'Monthly':'M', 'Year':'a'}\n",
"\n",
"def EIA_spot(strURL, strName):\n",
" r=requests.get(strURL)\n",
" soup=BeautifulSoup(r.text, 'html.parser')\n",
" tbls=soup.findAll('table', class_='FloatTitle')\n",
"\n",
" sPrices=pd.Series()\n",
" for tr in tbls[0].findAll('tr'):\n",
" lstTD=[i.text.strip() for i in tr.findAll('td')]\n",
" if len(lstTD)>1:\n",
" dctD=dict(zip(get_date_list(int(lstTD[0])), list(map(lambda x: x if x!='' else np.nan, lstTD[1:]))))\n",
" sPrices=sPrices.append(pd.Series(dctD))\n",
" sPrices.name=strName\n",
" return sPrices.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WTI: last date = Aug-2018, last val = 68.06\n",
"BRENT: last date = Aug-2018, last val = 72.53\n"
]
}
],
"source": [
"#getBRENT().to_sql('BRENT_spot', con=conn, if_exists='replace', index_label='DATE')\n",
"brent=EIA_spot(cstrEIABrent_spot.format(dctFrequency['Monthly']), 'BRENT')\n",
"wti=EIA_spot(cstrEIAWTI_spot.format(dctFrequency['Monthly']), 'WTI')\n",
" \n",
"print_last_point('WTI', wti.index[-1], wti.iloc[-1])\n",
"print_last_point('BRENT', brent.index[-1], brent.iloc[-1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### write readed, merjed and sorted data to OIL_SPOT table"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" BRENT WTI\n",
"Date \n",
"Apr-2018 72.11 66.25\n",
"May-2018 76.98 69.98\n",
"Jun-2018 74.41 67.87\n",
"Jul-2018 74.25 70.98\n",
"Aug-2018 72.53 68.06\n"
]
}
],
"source": [
"pdfOIL=FuelsDataFrame(brent).merge(FuelsDataFrame(wti), how='outer', left_index=True, right_index=True)\n",
"pdfOIL.index.name='Date'\n",
"pdfOIL=pdfOIL.sort_index_date(inplace=False)\n",
"\n",
"#pdfOIL.to_sql('OIL_SPOT', eng_co, if_exists='upsert')\n",
"#print(pdfOIL.tail(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## OIL from WorldBank file\n",
"Brent, Dubai, WTI - without URALS"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/egor/anaconda3/lib/python3.6/site-packages/pandas/io/excel.py:329: FutureWarning: The `sheetname` keyword is deprecated, use `sheet_name` instead\n",
" **kwds)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" CRUDE_PETRO CRUDE_BRENT CRUDE_DUBAI CRUDE_WTI\n",
"Date \n",
"May-2018 73.430000 76.65 73.66 69.98\n",
"Jun-2018 71.976667 75.19 73.22 67.52\n",
"Jul-2018 72.666667 74.44 72.72 70.84\n",
"Aug-2018 71.083333 73.13 72.13 67.99\n",
"Sep-2018 75.363333 78.86 77.02 70.21\n"
]
}
],
"source": [
"pdfOils=read_wb_prices().filter(regex='CRUDE').dropna(axis=0, how='all')\n",
"oils=FuelsDataFrame(pdfOils)\n",
"oils.index_to_string()\n",
"oils.to_sql('OILS', eng_co, if_exists='upsert')\n",
"print(oils.tail(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Urals\n",
"Source - http://economy.gov.ru/minec/activity/sections/foreignEconomicActivity/monitoring/index?WCM_PI=1&WCM_PageSize.556a41804309ce5898fbbb1aee474279=40 - base URL from MinEconom RF.\n",
"It will be a little tricky:\n",
"1. Read URL above\n",
"2. Find in WWW key words 'нефть Urals'.\n",
"3. Go to URL of this item\n",
"4. Read URL price (considering date of publishing)\n",
"5. Read URALS price from this URL (and date)\n",
"6. Upsert URALS table in Database (or CREATE for first time). If CREATE - read ALL items from MinEco and... Everything is difficult there. Many formats, so it's easy make table dy hand (timeline not so long)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"All Items in site 40\n",
"Page title = Министерство экономического развития Российской Федерации - Деятельность - Конъюнктура мировых товарных рынков\n"
]
}
],
"source": [
"#lItemsOnPage=1000 # for CREATING Urals table\n",
"lItemsOnPage=40 # for UPSERT Urals table\n",
"\n",
"strEconomyGovRU=r'http://economy.gov.ru/minec/activity/sections/foreignEconomicActivity/monitoring/index?WCM_PI=1&WCM_PageSize.556a41804309ce5898fbbb1aee474279={0}'\n",
"\n",
"#class=\"docs_bookmarks\"\n",
"sess1=requests.session()\n",
"docs=sess1.get(strEconomyGovRU.format(lItemsOnPage))\n",
"docs.encoding=docs.apparent_encoding\n",
"spEcon=BeautifulSoup(docs.text, 'html.parser')\n",
"\n",
"tblDocs=spEcon.findAll('table', {'class': re.compile('list_docs\\d+')})\n",
"\n",
"print('All Items in site ', len(tblDocs))\n",
"print('Page title = ', spEcon.find('title').text)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Date Title \\\n",
"0 03.10.2018 г. О средней цене на нефть сорта «Юралс» за сентя... \n",
"1 10.09.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"2 04.09.2018 г. О средней цене на нефть сорта «Юралс» за авгус... \n",
"3 15.08.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"4 13.08.2018 г. О средней цене на нефть сорта «Юралс» за июль ... \n",
"5 13.07.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"6 04.07.2018 г. О средней цене на нефть сорта «Юралс» за июнь ... \n",
"7 14.06.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"8 07.06.2018 г. О средней цене на нефть сорта «Юралс» за май 2... \n",
"9 15.05.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"10 07.05.2018 г. О средней цене на нефть сорта «Юралс» за апрел... \n",
"11 12.04.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"12 12.04.2018 г. О средней цене на нефть сорта «Юралс» за март ... \n",
"13 13.03.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"14 13.03.2018 г. О средней цене на нефть сорта «Юралс» за февра... \n",
"15 14.02.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"16 14.02.2018 г. О средней цене на нефть сорта «Юралс» за январ... \n",
"17 15.01.2018 г. Сообщение о средних за истекший налоговый пери... \n",
"18 15.01.2018 г. О средней цене на нефть сорта «Юралс» за декаб... \n",
"19 15.12.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"20 21.11.2017 г. Среднемесячные мировые цены на отдельные товары \n",
"21 10.11.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"22 11.10.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"23 15.09.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"24 11.08.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"25 13.07.2017 г. Канада прекратила антидемпинговое расследовани... \n",
"26 13.07.2017 г. Министерство торговли и промышленности Индии и... \n",
"27 13.07.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"28 06.06.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"29 15.05.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"30 24.04.2017 г. Украина начала промежуточный пересмотр антидем... \n",
"31 24.04.2017 г. Украина начала антидемпинговое расследование в... \n",
"32 24.04.2017 г. Бразилия начинает антидемпинговое расследовани... \n",
"33 24.04.2017 г. Украина ввела антидемпинговые меры в отношении... \n",
"34 13.04.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"35 22.03.2017 г. Среднемесячные мировые цены на отдельные товар... \n",
"36 15.03.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"37 15.02.2017 г. Сообщение о средних за истекший налоговый пери... \n",
"38 26.01.2017 г. Среднемесячные мировые цены на отдельные товары \n",
"39 25.01.2017 г. Перечень и отдельные показатели мониторинга не... \n",
"\n",
" URL \n",
"0 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"1 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"2 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"3 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"4 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"5 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"6 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"7 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"8 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"9 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"10 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"11 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"12 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"13 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"14 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"15 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"16 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"17 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"18 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"19 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"20 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"21 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"22 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"23 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"24 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"25 /wps/wcm/connect/economylib4/mer/about/structu... \n",
"26 /wps/wcm/connect/economylib4/mer/about/structu... \n",
"27 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"28 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"29 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"30 /wps/wcm/connect/economylib4/mer/about/structu... \n",
"31 /wps/wcm/connect/economylib4/mer/about/structu... \n",
"32 /wps/wcm/connect/economylib4/mer/about/structu... \n",
"33 /wps/wcm/connect/economylib4/mer/about/structu... \n",
"34 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"35 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"36 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"37 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"38 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"39 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
" Date Title \\\n",
"0 03.10.2018 г. О средней цене на нефть сорта «Юралс» за сентя... \n",
"2 04.09.2018 г. О средней цене на нефть сорта «Юралс» за авгус... \n",
"4 13.08.2018 г. О средней цене на нефть сорта «Юралс» за июль ... \n",
"6 04.07.2018 г. О средней цене на нефть сорта «Юралс» за июнь ... \n",
"8 07.06.2018 г. О средней цене на нефть сорта «Юралс» за май 2... \n",
"10 07.05.2018 г. О средней цене на нефть сорта «Юралс» за апрел... \n",
"12 12.04.2018 г. О средней цене на нефть сорта «Юралс» за март ... \n",
"14 13.03.2018 г. О средней цене на нефть сорта «Юралс» за февра... \n",
"16 14.02.2018 г. О средней цене на нефть сорта «Юралс» за январ... \n",
"18 15.01.2018 г. О средней цене на нефть сорта «Юралс» за декаб... \n",
"\n",
" URL \n",
"0 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"2 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"4 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"6 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"8 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"10 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"12 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"14 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"16 /wps/wcm/connect/economylib4/mer/activity/sect... \n",
"18 /wps/wcm/connect/economylib4/mer/activity/sect... \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/egor/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:9: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n",
" if __name__ == '__main__':\n"
]
}
],
"source": [
"dfMinEcoItems=pd.DataFrame(columns=['Date', 'Title', 'URL'])\n",
"\n",
"for i, tbl in enumerate(tblDocs):\n",
" dfMinEcoItems.loc[i]=[tbl.find('span', class_='list_docs_date').text.strip(), \n",
" tbl.find('span', class_='list_docs_title').text.strip(),\n",
" tbl.find('a')['href']]\n",
"\n",
"print(dfMinEcoItems)\n",
"print(dfMinEcoItems[dfMinEcoItems['Title'].str.contains(r'([нН]ефть)|(Юралс)')])"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Price\n",
"Date \n",
"Dec-2017 63.8\n",
"Jan-2018 68.6\n",
"Feb-2018 63.2\n",
"Mar-2018 63.6\n",
"Apr-2018 69.1\n",
"May-2018 75.1\n",
"Jun-2018 73.1\n",
"Jul-2018 72.8\n",
"Aug-2018 71.5\n",
"Sep-2018 76.3\n",
"(117, 1)\n"
]
}
],
"source": [
"# CHEATING\n",
"\n",
"pdf=pd.read_csv(r'urals.csv', sep=';', encoding='cp1251')\n",
"\n",
"dctTr={'Январь':'Jan', 'Февраль':'Feb', 'Март':'Mar', 'Апрель':'Apr', 'Май':'May', 'Июнь':'Jun', 'Июль':'Jul', \n",
" 'Август':'Aug', 'Сентябрь':'Sep', 'Октябрь':'Okt', 'Ноябрь':'Nov', 'Декабрь':'Dec'}\n",
"\n",
"pdf['Date']=pdf.apply(lambda x: '{0}-{1}'.format(dctMonthTranslate[x['Month']], x['Year']), axis=1)\n",
"pdfUrals=pdf[['Date', 'val']].rename(columns={'val':'Price'})\n",
"\n",
"urals_hist=FuelsDataFrame(pdfUrals)\n",
"urals_hist.set_index('Date', inplace=True)\n",
"print(urals_hist.tail(10))\n",
"urals_hist=urals_hist.sort_index_date(inplace=False)\n",
"\n",
"\n",
"urals_hist.to_sql('URALS', eng_co, if_exists='replace') # make URALS sqlite table\n",
"\n",
"print(urals_hist.shape)"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/egor/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n",
" \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Date Price\n",
"0 Sep-2018 78.13\n",
"2 Aug-2018 71.52\n",
"4 Jul-2018 72.84\n",
"6 Jun-2018 73.06\n",
"8 May-2018 75.14\n",
"10 Apr-2018 69.10\n",
"12 Mar-2018 63.58\n",
"14 Feb-2018 63.14\n",
"16 Jan-2018 68.60\n",
"18 Dec-2017 63.77\n"
]
}
],
"source": [
"#filter items with oil prices\n",
"pdfOilItems=dfMinEcoItems[dfMinEcoItems['Title'].str.contains(r'([нН]ефть)|(Юралс)')] \n",
"\n",
"strReplNew=r'http://economy.gov.ru/minec/activity/sections/'\n",
"strReplOld=r'/wps/wcm/connect/economylib4/mer/activity/sections/'\n",
"\n",
"def get_date_from_title(strTitle):\n",
" strSearch=r'(?i).*(?P<month>' + '|'.join(dctTr.keys()) + ')\\s(?P<year>\\d{4}).*'\n",
" #print(strSearch)\n",
" mo=re.match(strSearch, strTitle)\n",
" return '{0}-{1}'.format(dctMonthTranslate[mo[1].capitalize()], mo[2])\n",
" \n",
"def get_price(soup):\n",
" str_price_pattern=re.compile('^\\d+,?\\d*\\sдолл\\. США.*', re.MULTILINE)\n",
" strFind=soup('p', string=str_price_pattern)[0].text.replace(',', '.')\n",
" #print(strFind)\n",
" return re.match(r'\\d+\\.?\\d*', strFind).group(0)\n",
"\n",
"pdfUralsPrice=pd.DataFrame(columns=['Date', 'Price'])\n",
"\n",
"#get urals prices and date from items\n",
"\n",
"sess2=requests.session()\n",
"\n",
"for i, Item in pdfOilItems.iterrows():\n",
" htmlI=sess2.get(Item['URL'].replace(strReplOld, strReplNew))\n",
" htmlI.encoding=htmlI.apparent_encoding\n",
" u_i=BeautifulSoup(htmlI.text, 'html.parser')\n",
" str_title=u_i.find('title')\n",
" try:\n",
" pdfUralsPrice.loc[i]=[get_date_from_title(str_title.text), get_price(u_i)]\n",
" except:\n",
" print(str_title.text)\n",
" print(Item['URL'].replace(strReplOld, strReplNew))\n",
" print('\\n')\n",
" \n",
"\n",
"print(pdfUralsPrice)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### For UPSERT URALS Database table"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Price\n",
"Date \n",
"Dec-2017 63.77\n",
"Jan-2018 68.60\n",
"Feb-2018 63.14\n",
"Mar-2018 63.58\n",
"Apr-2018 69.10\n",
"May-2018 75.14\n",
"Jun-2018 73.06\n",
"Jul-2018 72.84\n",
"Aug-2018 71.52\n",
"Sep-2018 78.13\n"
]
}
],
"source": [
"pdfNewPrice=FuelsDataFrame(pdfUralsPrice)\n",
"pdfNewPrice.set_index('Date', inplace=True)\n",
"\n",
"pdfNewPrice=pdfNewPrice.sort_index_date()\n",
"print(pdfNewPrice)"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"pdfNewPrice.to_sql('URALS', eng_co, if_exists='upsert')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Coal"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### https://ycharts.com/indicators/australia_coal_price \n",
"Australia energy, USD per Metric Ton, F.O.B. piers, Newcastle/Port Kembla; 6,300 kcal per Kilogram (11,340 btu/lb), less than 0.8%, sulfur 13% ash beginning January 2002; previously 6,667 kcal per Kilogram (12,000 btu/lb), less than 1.0% sulfur, 14% ash."
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" AustralianEnergy\n",
"Date \n",
"aug-2018 117.34\n",
"sep-2018 114.16\n"
]
}
],
"source": [
"strAusCoal=r'https://ycharts.com/indicators/australia_coal_price'\n",
"\n",
"def australian_energy(strURL=strAusCoal):\n",
" sessY1=requests.session()\n",
" aus_coal=sessY1.get(strAusCoal)\n",
" aus_coal.encoding=aus_coal.apparent_encoding\n",
" spAC=BeautifulSoup(aus_coal.text, 'html.parser')\n",
"\n",
" tblACPrices=spAC.findAll('table', class_='histDataTable')\n",
" \n",
" dctACPrices=dict()\n",
"\n",
" for tbl in tblACPrices:\n",
" tr=tbl.findAll('tr')\n",
" for r in tr:\n",
" strDt=r.find('td', class_='col1')\n",
" \n",
" if strDt is not None:\n",
" tmpDt=strDt.text.strip().split(' ')\n",
" strDt=tmpDt[0][:3].lower()+'-'+tmpDt[2]\n",
" dtPr=r.find('td', class_='col2').text.strip()\n",
" dctACPrices.setdefault(strDt, dtPr)\n",
" srACPrices=pd.Series(dctACPrices)\n",
" srACPrices.name='AustralianEnergy'\n",
" srACPrices.index.name='Date'\n",
" return FuelsDataFrame(srACPrices)\n",
"\n",
"#pdfAC=australian_energy().sort_index_date()\n",
"#print(pdfAC.tail(2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"COALS from WorldBank file (Australian, Colombian, South Afirca"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/egor/anaconda3/lib/python3.6/site-packages/pandas/io/excel.py:329: FutureWarning: The `sheetname` keyword is deprecated, use `sheet_name` instead\n",
" **kwds)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" COAL_AUS COAL_COL COAL_SAFRICA\n",
"Date \n",
"May-2018 106.20 NaN 101.89\n",
"Jun-2018 114.75 NaN 103.88\n",
"Jul-2018 119.57 NaN 106.02\n",
"Aug-2018 117.34 NaN 98.48\n",
"Sep-2018 114.16 NaN 102.14\n"
]
}
],
"source": [
"pdfCoals=read_wb_prices().filter(regex='COAL').dropna(axis=0, how='all')\n",
"coals=FuelsDataFrame(pdfCoals)\n",
"coals.index_to_string()\n",
"coals.to_sql('COALS', eng_co, if_exists='upsert')\n",
"print(coals.tail(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### NYMEX coal futures \n",
"https://www.eia.gov/coal/markets/includes/nymex_price_data_historical.xlsx"
]
},
{
"cell_type": "code",
"execution_count": 266,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" EASTERN_RAIL WESTERN_RAIL INDONESIAN_MCCLOSKEY NYMEX_APPALACHIAN\n",
"Date \n",
"Jan-2018 71.229762 13.700676 NaN NaN\n",
"Feb-2018 71.027119 14.067229 NaN NaN\n",
"Mar-2018 69.739017 13.980978 NaN NaN\n",
"Apr-2018 65.594328 13.986752 NaN NaN\n",
"May-2018 63.425472 13.741309 NaN NaN\n",
"Jun-2018 67.126540 13.790960 NaN NaN\n",
"Jul-2018 70.359986 13.638736 NaN NaN\n",
"Aug-2018 72.992158 13.730961 NaN NaN\n",
"Sep-2018 78.081909 13.672137 NaN NaN\n",
"Oct-2018 85.291313 13.778887 NaN NaN\n"
]
}
],
"source": [
"strNYMEX_COAL=r'https://www.eia.gov/coal/markets/includes/nymex_price_data_historical.xlsx'\n",
"def read_nymex_coal(strURL=strNYMEX_COAL):\n",
" # short ton to tonne - 0.907185\n",
" k_con=0.907185\n",
" pdfNC=pd.read_excel(strURL)\n",
" pdfNC['Date']=pd.to_datetime(pdfNC['DATA_DATE'], format='%Y-%m-%d')\n",
" pdfg=pdfNC.set_index('Date').groupby(pd.Grouper(freq='M'))[['EASTERN_RAIL', 'WESTERN_RAIL', 'INDONESIAN_MCCLOSKEY', \n",
" 'NYMEX_APPALACHIAN']].mean()\n",
" return pdfg/k_con\n",
" \n",
"nymex_coal=FuelsDataFrame(read_nymex_coal())\n",
"nymex_coal.index_to_string()\n",
"nymex_coal.to_sql('NYMEX_COALS', eng_co, if_exists='upsert')\n",
"print(nymex_coal.tail(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Currency prices\n"
]
},
{
"cell_type": "code",
"execution_count": 260,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Value Info\n",
"Code \n",
"USD 14 USD (Доллар)\n",
"EUR 7 (EUR (Евро)\n",
"CHF 4 CHF (Швейцарский Франк)\n"
]
}
],
"source": [
"dct_Currency_translate={'USD':['14', 'USD (Доллар)'],\n",
"'EUR':['7', '(EUR (Евро)'],\n",
"'CHF':['4', 'CHF (Швейцарский Франк)'],\n",
"'GBP':['8', 'GBP (Английский Фунт Стерлингов)'],\n",
"'AMD':['18', 'AMD (Армянский Драм)'],\n",
"'AUD':['1', 'AUD (Австралийский Доллар)'],\n",
"'AZN':['19', 'AZN (Азербайджанский Манат)'],\n",
"'BGN':['2', 'BGN (Болгарский Лев)'],\n",
"'HUF':['9', 'HUF (Венгерский Форинт)'],\n",
"'BRL':['31', 'BRL (Бразильский реал)'],\n",
"'BYN':['20', 'BYN (Белорусский Рубль)'],\n",
"'CAD':['3', 'CAD (Канадский Доллар)'],\n",
"'CNY':['21', 'CNY (Китайский юань Ренминби)'],\n",
"'CZK':['5', 'CZK (Чешская Крона)'],\n",
"'DKK':['6', 'DKK (Датская Крона)'],\n",
"'INR':['22', 'INR (Индийская pупия)'],\n",
"'JPY':['10', 'JPY (Японская Иена)'],\n",
"'KGS':['23', 'KGS (Киргизский Сом)'],\n",
"'KRW':['24', 'KRW (Южнокорейский вон)'],\n",
"'KZT':['25', 'KZT (Казахский Тенге)'],\n",
"'MDL':['17', 'MDL (Молдавский лей)'],\n",
"'NOK':['26', 'NOK (Норвежская Крона)'],\n",
"'PLN':['11', 'PLN (Польская Злота)'],\n",
"'RON':['15', 'RON (Румынский Лей)'],\n",
"'SEK':['12', 'SEK (Шведская Крона)'],\n",
"'SGD':['32', 'SGD (Сингапурский доллар)'],\n",
"'TJS':['27', 'TJS (Таджикский Сомони)'],\n",
"'TMT':['28', 'TMT (Туркменский Манат)'],\n",
"'TRY':['13', 'TRY (Турецкая Лира)'],\n",
"'UAH':['16', 'UAH (Украинская Гривна)'],\n",
"'UZS':['29', 'UZS (Узбекский Сум)'],\n",
"'ZAR':['33', 'ZAR (Южноафриканский рэнд)']}\n",
"\n",
"#print(dct_Currency_translate)\n",
"\n",
"pdfCurrencyDict=pd.DataFrame.from_dict(dct_Currency_translate, orient='index', columns=['Value', 'Info'])\n",
"pdfCurrencyDict.index.name='Code'\n",
"#pdfCurrencyDict.to_sql('curr_digest', eng_co, if_exists='replace')\n",
"\n",
"print(pdfCurrencyDict.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 309,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date\n",
"Jan-2018 56.4981\n",
"Feb-2018 56.8067\n",
"Mar-2018 57.0636\n",
"Apr-2018 60.7699\n",
"May-2018 62.2309\n",
"Jun-2018 62.7685\n",
"Jul-2018 62.8619\n",
"Aug-2018 66.0764\n",
"Sep-2018 67.6661\n",
"Oct-2018 65.6081\n",
"Name: USD, dtype: object\n"
]
}
],
"source": [
"strWWW_Price=r'https://www.kursvaliut.ru/%D1%81%D1%80%D0%B5%D0%B4%D0%BD%D0%B8%D0%B9-%D0%BE%D0%B1%D0%BC%D0%B5%D0%BD%D0%BD%D1%8B%D0%B9-%D0%BA%D1%83%D1%80%D1%81-%D0%B7%D0%B0-%D0%BC%D0%B5%D1%81%D1%8F%D1%86'\n",
"lStartYear=2003\n",
"\n",
"def get_currency(name='USD', strURL=strWWW_Price, year=2018):\n",
" def get_year_prices(sess, lYear):\n",
" html=sess.post(strURL, data={'currency':str(pdfCurrencyDict.loc[name]['Value']), 'year':str(lYear), 'display':''})\n",
" html.encoding=html.apparent_encoding\n",
" soup=BeautifulSoup(html.text, 'html.parser')\n",
" tbl_curr=soup.findAll('table', class_='table table-condensed table-borderless') \n",
" dctCurr=dict()\n",
" \n",
" for r in tbl_curr[0].findAll('tr'):\n",
" tds=r.findAll('td')\n",
" if len(tds)==3:\n",
" dctCurr.setdefault('{0}-{1}'.format( dctMonthTranslate[tds[1].text], lYear), \n",
" tds[2].text.replace(',', '.').replace('RUB', '').strip())\n",
" return pd.Series(dctCurr).replace('-', np.nan).dropna()\n",
" \n",
" sessCUR=requests.session()\n",
" pdSer=None\n",
" \n",
" if type(year) is list:\n",
" pdSer=pd.concat([get_year_prices(sessCUR, str(y)) for y in year])\n",
" else:\n",
" pdSer=get_year_prices(sessCUR, str(year))\n",
" \n",
" pdSer.name=name\n",
" pdSer.index.name='Date'\n",
" return pdSer\n",
"\n",
"#print(get_currency(year=list(range(lStartYear, dt.now().year+1))))\n",
"print(get_currency(year=dt.now().year))\n"
]
},
{
"cell_type": "code",
"execution_count": 288,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" USD EUR CHF GBP ZAR AUD UAH \\\n",
"Apr-2003 31.2118 33.8671 22.6552 49.1350 NaN 19.0203 58.0955 \n",
"Apr-2004 28.6856 34.4463 22.1445 51.8878 NaN 21.4323 53.4004 \n",
"Apr-2005 27.8104 35.9935 23.2550 52.6926 NaN 21.4981 53.6130 \n",
"Apr-2006 27.5645 33.7674 21.4333 48.6204 NaN 20.2415 54.1084 \n",
"Apr-2007 25.8384 34.8919 21.3074 51.3488 NaN 21.3404 51.0500 \n",
"Apr-2008 23.5129 37.0635 23.2486 46.5852 NaN 21.8549 46.8334 \n",
"Apr-2009 33.5833 44.2798 29.2092 49.2375 NaN 23.8827 41.7683 \n",
"Apr-2010 29.1932 39.2297 27.3569 44.6934 39.7876 27.0111 36.7309 \n",
"Apr-2011 28.0840 40.5525 31.2502 45.9490 41.7470 29.6854 35.2378 \n",
"Apr-2012 29.4909 38.8145 32.2850 47.1885 37.6668 30.5124 36.7276 \n",
"\n",
" TRY CNY \n",
"Apr-2003 NaN NaN \n",
"Apr-2004 NaN NaN \n",
"Apr-2005 20.4012 NaN \n",
"Apr-2006 20.5983 NaN \n",
"Apr-2007 18.9809 33.4447 \n",
"Apr-2008 18.0427 33.5859 \n",
"Apr-2009 20.8096 49.1524 \n",
"Apr-2010 19.5380 42.7680 \n",
"Apr-2011 18.4335 43.0117 \n",
"Apr-2012 16.4938 46.7675 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/egor/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
"of pandas will change to not sort by default.\n",
"\n",
"To accept the future behavior, pass 'sort=False'.\n",
"\n",
"To retain the current behavior and silence the warning, pass 'sort=True'.\n",
"\n",
" after removing the cwd from sys.path.\n"
]
}
],
"source": [
"pdf=pdfCurrencyDict.loc[['USD', 'EUR', 'CHF', 'GBP', 'ZAR', 'AUD', 'UAH', 'TRY', 'CNY']]\n",
"lstSerCurr = [get_currency(name=n, only_last=True) for n in pdf.index.tolist()]\n",
"\n",
"pdfAllCurrency=pd.concat(lstSerCurr, axis=1)\n",
"print(pdfAllCurrency.head(10))"
]
},
{
"cell_type": "code",
"execution_count": 291,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" USD EUR CHF GBP ZAR AUD UAH \\\n",
"Date \n",
"Jan-2003 31.8162 33.8068 23.1122 51.4064 NaN 18.5728 59.4571 \n",
"Feb-2003 31.6990 34.1878 23.2872 51.2838 NaN 18.8274 59.1510 \n",
"Mar-2003 31.4533 33.9520 23.1135 49.7896 NaN 18.9436 58.7159 \n",
"Apr-2003 31.2118 33.8671 22.6552 49.1350 NaN 19.0203 58.0955 \n",
"May-2003 30.9071 35.7382 23.5722 50.1619 NaN 20.0244 56.9717 \n",
"Jun-2003 30.4686 35.5942 23.1201 50.5924 NaN 20.2379 56.2599 \n",
"Jul-2003 30.3603 34.5600 22.3280 49.3934 NaN 20.1325 56.6282 \n",
"Aug-2003 30.3490 33.8762 21.9774 48.4264 NaN 19.7733 56.3997 \n",
"Sep-2003 30.5986 34.2997 22.1499 49.1956 NaN 20.2222 56.8598 \n",
"Oct-2003 30.1647 35.2963 22.7996 50.5292 NaN 20.8875 55.9533 \n",
"Nov-2003 29.8080 34.9241 22.4059 50.4194 NaN 21.3480 55.3104 \n",
"Dec-2003 29.4337 36.1337 23.2363 51.4962 NaN 21.7402 54.7012 \n",
"Jan-2004 28.8388 36.3770 23.2346 52.5155 NaN 22.2145 53.4945 \n",
"Feb-2004 28.5147 36.0925 22.9353 53.2670 NaN 22.2010 52.8206 \n",
"Mar-2004 28.5293 35.0178 22.3390 52.1378 NaN 21.3859 52.9490 \n",
"Apr-2004 28.6856 34.4463 22.1445 51.8878 NaN 21.4323 53.4004 \n",
"May-2004 28.9892 34.8168 22.6021 51.7607 NaN 20.4393 54.1808 \n",
"Jun-2004 29.0297 35.2982 23.2241 53.1750 NaN 20.2047 54.3604 \n",
"Jul-2004 29.0819 35.6730 23.3591 53.5538 NaN 20.7962 54.5010 \n",
"Aug-2004 29.2193 35.6284 23.1583 53.2364 NaN 20.7697 54.7076 \n",
"Sep-2004 29.2221 35.6610 23.1100 52.3872 NaN 20.4859 54.4855 \n",
"Oct-2004 29.0703 36.2872 23.5030 52.5060 NaN 21.2856 54.0400 \n",
"Nov-2004 28.5912 37.0793 24.3544 53.0830 NaN 21.9973 53.3158 \n",
"Dec-2004 27.9040 37.3896 24.3628 53.8120 NaN 21.4629 51.6965 \n",
"Jan-2005 28.0091 36.7193 23.7424 52.6316 NaN 21.4354 52.3585 \n",
"Feb-2005 27.9954 36.3810 23.4409 52.7323 NaN 21.8263 52.7165 \n",
"Mar-2005 27.6264 36.4697 23.5476 52.6579 NaN 21.6972 52.5410 \n",
"Apr-2005 27.8104 35.9935 23.2550 52.6926 NaN 21.4981 53.6130 \n",
"May-2005 27.9514 35.4847 22.9633 51.8197 NaN 21.3911 55.0478 \n",
"Jun-2005 28.4979 34.7252 22.5630 51.8809 NaN 21.8446 56.2702 \n",
"... ... ... ... ... ... ... ... \n",
"May-2016 65.8387 74.3600 67.1914 95.6037 42.6952 47.9717 26.1101 \n",
"Jun-2016 65.2193 73.2865 67.1764 93.1381 43.1428 48.1925 26.1437 \n",
"Jul-2016 64.3380 71.1955 65.5258 84.7451 44.5630 48.4197 25.9245 \n",
"Aug-2016 64.9365 72.7963 66.9585 85.1322 47.3507 49.5899 25.8900 \n",
"Sep-2016 64.5572 72.3626 66.2163 84.9709 45.9062 48.9826 24.6044 \n",
"Oct-2016 62.6200 69.1254 63.5256 77.5759 44.8684 47.7094 24.3255 \n",
"Nov-2016 64.3137 69.5389 64.6218 79.8364 46.1841 48.5275 24.9942 \n",
"Dec-2016 62.0913 65.5193 60.9074 77.5151 44.8141 45.6413 23.6765 \n",
"Jan-2017 59.6299 63.5634 59.2871 73.5076 44.0620 44.7257 21.8389 \n",
"Feb-2017 58.5394 62.4011 58.5190 73.1526 44.1501 44.8033 21.6335 \n",
"Mar-2017 58.0066 62.0003 57.8821 71.6081 44.9395 44.2328 21.4807 \n",
"Apr-2017 56.4356 60.4609 56.3883 71.2205 41.9441 42.5620 21.0070 \n",
"May-2017 56.9501 62.9553 57.7604 73.6536 43.0049 42.3591 21.5615 \n",
"Jun-2017 57.8932 64.9659 59.7508 74.0526 44.8984 43.7043 22.1735 \n",
"Jul-2017 59.6927 68.6209 62.1858 77.5080 45.4215 46.4322 22.9923 \n",
"Aug-2017 59.6127 70.3716 61.7646 77.3673 45.0163 47.2112 23.2444 \n",
"Sep-2017 57.7447 68.7917 59.9956 76.6621 43.9566 46.0204 22.0862 \n",
"Oct-2017 57.6981 67.8553 58.8295 76.1698 42.1978 44.9653 21.6633 \n",
"Nov-2017 58.9266 69.1320 59.4096 77.8999 41.8070 44.9426 22.0670 \n",
"Dec-2017 58.5739 69.3379 59.3212 78.5354 44.5812 44.7882 21.2723 \n",
"Jan-2018 56.4981 69.0200 58.8722 78.2706 46.3991 45.0472 19.7728 \n",
"Feb-2018 56.8067 70.3013 60.8589 79.5643 47.9994 44.8813 20.9090 \n",
"Mar-2018 57.0636 70.3559 60.2598 79.7545 48.2202 44.2858 21.6585 \n",
"Apr-2018 60.7699 74.6485 62.8125 85.6182 50.2763 46.7311 23.2551 \n",
"May-2018 62.2309 73.5186 62.3736 83.7698 49.6981 46.8614 23.7800 \n",
"Jun-2018 62.7685 73.2698 63.4176 83.3727 47.3390 47.0387 23.9289 \n",
"Jul-2018 62.8619 73.4467 63.1868 82.8237 46.8145 46.5159 23.8116 \n",
"Aug-2018 66.0764 76.2987 66.7733 85.0882 47.1997 48.4493 24.0199 \n",
"Sep-2018 67.6661 78.8998 69.9251 88.3078 45.8489 48.7479 24.0099 \n",
"Oct-2018 65.6081 75.7502 66.5461 85.2804 45.6494 46.9624 23.2276 \n",
"\n",
" TRY CNY \n",
"Date \n",
"Jan-2003 NaN NaN \n",
"Feb-2003 NaN NaN \n",
"Mar-2003 NaN NaN \n",
"Apr-2003 NaN NaN \n",
"May-2003 NaN NaN \n",
"Jun-2003 NaN NaN \n",
"Jul-2003 NaN NaN \n",
"Aug-2003 NaN NaN \n",
"Sep-2003 NaN NaN \n",
"Oct-2003 NaN NaN \n",
"Nov-2003 NaN NaN \n",
"Dec-2003 NaN NaN \n",
"Jan-2004 NaN NaN \n",
"Feb-2004 NaN NaN \n",
"Mar-2004 NaN NaN \n",
"Apr-2004 NaN NaN \n",
"May-2004 NaN NaN \n",
"Jun-2004 NaN NaN \n",
"Jul-2004 NaN NaN \n",
"Aug-2004 NaN NaN \n",
"Sep-2004 NaN NaN \n",
"Oct-2004 NaN NaN \n",
"Nov-2004 NaN NaN \n",
"Dec-2004 NaN NaN \n",
"Jan-2005 20.7591 NaN \n",
"Feb-2005 21.1906 NaN \n",
"Mar-2005 21.0502 NaN \n",
"Apr-2005 20.4012 NaN \n",
"May-2005 20.3525 NaN \n",
"Jun-2005 20.9148 NaN \n",
"... ... ... \n",
"May-2016 22.2887 100.7649 \n",
"Jun-2016 22.3424 98.9754 \n",
"Jul-2016 21.7446 96.3687 \n",
"Aug-2016 21.9158 97.6759 \n",
"Sep-2016 21.7700 96.7404 \n",
"Oct-2016 20.3901 93.1303 \n",
"Nov-2016 19.6414 94.0331 \n",
"Dec-2016 17.7733 89.7120 \n",
"Jan-2017 15.6957 86.5957 \n",
"Feb-2017 15.8805 85.1518 \n",
"Mar-2017 15.8052 84.1281 \n",
"Apr-2017 15.4491 81.8973 \n",
"May-2017 15.9527 82.6806 \n",
"Jun-2017 16.4476 85.0152 \n",
"Jul-2017 16.7498 88.1019 \n",
"Aug-2017 16.9654 89.2762 \n",
"Sep-2017 16.6243 87.9055 \n",
"Oct-2017 15.7183 87.0673 \n",
"Nov-2017 15.1788 88.9832 \n",
"Dec-2017 15.2280 88.8602 \n",
"Jan-2018 14.9473 88.0541 \n",
"Feb-2018 15.0265 89.9084 \n",
"Mar-2018 14.6593 90.3081 \n",
"Apr-2018 14.9516 96.4702 \n",
"May-2018 13.9869 97.6360 \n",
"Jun-2018 13.5478 97.0977 \n",
"Jul-2018 13.2051 93.6810 \n",
"Aug-2018 11.4287 96.4759 \n",
"Sep-2018 10.6613 98.7274 \n",
"Oct-2018 10.8304 95.5148 \n",
"\n",
"[190 rows x 9 columns]\n"
]
}
],
"source": [
"#print(pdfAllCurrency)\n",
"\n",
"currency=FuelsDataFrame(pdfAllCurrency)\n",
"currency=currency.sort_index_date()\n",
"currency.index.name='Date'\n",
"\n",
"currency.to_sql('CURRENCY', eng_co, if_exists='replace')\n",
"\n",
"print(currency)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Uranium Price\n",
"\n",
"https://www.cameco.com/invest/markets/uranium-price"
]
},
{
"cell_type": "code",
"execution_count": 239,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" U3O8\n",
"Date \n",
"May-2018 22.73\n",
"Jun-2018 22.65\n",
"Jul-2018 25.78\n",
"Aug-2018 26.30\n",
"Sep-2018 27.50\n"
]
}
],
"source": [
"strUraniumURL=r'https://www.cameco.com/invest/markets/uranium-price'\n",
"\n",
"\n",
"def get_uranium(strURL=strUraniumURL):\n",
" sess=requests.session()\n",
" ura=sess.get(strURL)\n",
" ura.encoding=ura.apparent_encoding\n",
" sp_ura=BeautifulSoup(ura.text, 'html.parser')\n",
" \n",
" tbls=sp_ura.findAll('table', class_='table-data')\n",
" #tbls[0] - spot prices\n",
" ths=[th.text for th in tbls[0].findAll('th')]\n",
" ths[0]='Month'\n",
" tds=[[td.text for td in tr.findAll('td')] for tr in tbls[0].findAll('tr') ]\n",
" pdfUra=pd.DataFrame(list(filter(None, tds)), columns=ths).set_index('Month')\n",
" pdfUra=pdfUra.stack().reset_index()\n",
" pdfUra['Date']=pdfUra.apply(lambda x: '{0}-{1}'.format(x['Month'].capitalize(), x['level_1']), axis=1)\n",
" pdfUra.rename(columns={0:'U3O8'}, inplace=True)\n",
" pdfU=FuelsDataFrame(pdfUra[['Date', 'U3O8']].set_index('Date')).replace({'-':np.nan}).dropna()\n",
" #print(pdfU)\n",
" return pdfU.sort_index_date(inplace=False)\n",
"\n",
" #print(list(filter(None, tds)))\n",
" #print(len(tbls))\n",
"\n",
"uranium=get_uranium()\n",
"uranium.to_sql('URANIUM', eng_co, if_exists='upsert')\n",
"print(uranium.tail(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Natural gas\n",
"\n",
"### Henry hub - https://www.eia.gov/dnav/ng/hist/rngwhhdm.htm"
]
},
{
"cell_type": "code",
"execution_count": 241,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NG_HH\n",
"Date \n",
"Jan-1997 3.45\n",
"Feb-1997 2.15\n",
"Mar-1997 1.89\n",
"Apr-1997 2.03\n",
"May-1997 2.25\n"
]
}
],
"source": [
"strHenryHubURL=r'https://www.eia.gov/dnav/ng/hist/rngwhhdm.htm'\n",
"\n",
"def get_henry_nub(strHHURL=strHenryHubURL):\n",
" def convert2float(strDigit):\n",
" try:\n",
" return float(strDigit)\n",
" except:\n",
" return np.nan\n",
" \n",
" sess=requests.session()\n",
" hh=sess.get(strHHURL)\n",
" hh.encoding=hh.apparent_encoding\n",
" sp_hh=BeautifulSoup(hh.text, 'html.parser')\n",
" \n",
" tbl=sp_hh.find('th', text='Year').parent.parent\n",
" lstCols=[h.text for h in tbl.findAll('th')]\n",
" lstCels=list(filter(None, [[convert2float(td.text) for td in r.findAll('td')] for r in tbl.findAll('tr')]))\n",
" pdf=pd.DataFrame(lstCels, columns=lstCols).dropna(how='all').set_index('Year').unstack().reset_index()\n",
" pdf.rename(columns={0:'NG_HH', 'level_0':'month'}, inplace=True)\n",
" pdf['Date']=pdf.apply(lambda x: '{}-{}'.format(x['month'].lower(), int(x['Year'])), axis=1)\n",
" pdf.set_index('Date', inplace=True)\n",
" pdf.index=pdf.index.str.capitalize()\n",
" return FuelsDataFrame(pdf[['NG_HH']]).sort_index_date(inplace=False).dropna()\n",
" \n",
"henry_hub=get_henry_nub()\n",
"print(henry_hub.head(5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### NG - YChart and WorldBank - 'for Europe'\n",
"https://ycharts.com/indicators/europe_natural_gas_price"
]
},
{
"cell_type": "code",
"execution_count": 242,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NG_EUROPE\n",
"Date \n",
"Aug-2017 5.28\n",
"Sep-2017 5.51\n",
"Oct-2017 6.08\n",
"Nov-2017 6.02\n",
"Dec-2017 6.56\n",
"Jan-2018 7.56\n",
"Feb-2018 6.87\n",
"Mar-2018 7.03\n",
"Apr-2018 7.81\n",
"May-2018 7.19\n"
]
}
],
"source": [
"strCalcGas=r'https://ycharts.com/indicators/europe_natural_gas_price'\n",
"\n",
"def get_ychart_gas(strURL=strCalcGas):\n",
" def correct_date(strDT):\n",
" lst=strDT.split(' ')\n",
" lst[0]=lst[0][:3]\n",
" return ' '.join(lst)\n",
" \n",
" sess=requests.session()\n",
" eg=sess.get(strURL)\n",
" eg.encoding=eg.apparent_encoding\n",
" sp_eg=BeautifulSoup(eg.text, 'html.parser')\n",
" \n",
" tbls=sp_eg.findAll('table', class_='histDataTable')\n",
" \n",
" trs=list(chain.from_iterable([t.findAll('tr') for t in tbls])) \n",
" #lstVals=list(filter(None, [[td.text.strip() for td in tr.findAll('td')] for tr in tbls[0].findAll('tr')]))\n",
" lstVals=list(filter(None, [[td.text.strip() for td in tr.findAll('td')] for tr in trs]))\n",
" pdf=pd.DataFrame(lstVals, columns=['Date', 'NG_EUROPE'])\n",
" pdf['Date']=pdf['Date'].apply(lambda x: dt.strptime(correct_date(x), '%b %d, %Y').strftime('%b-%Y').lower())\n",
" return FuelsDataFrame(pdf.set_index('Date'))\n",
"\n",
"def get_WB_prices():\n",
" pdfWBGas=read_wb_prices().filter(regex='GAS').dropna(axis=0, how='all')\n",
" pdfWBG=FuelsDataFrame(pdfWBGas)\n",
" pdfWBG.index_to_string()\n",
" pdfWBG.index=pdfWBG.index.str.lower()\n",
" return pdfWBG\n",
"\n",
"ychart_gas=get_ychart_gas()\n",
"wb_gas=get_WB_prices()['NGAS_EUR']\n",
"wb_gas.update(ychart_gas['NG_EUROPE'])\n",
"wb_gas.dropna(inplace=True)\n",
"wb_gas.name='NG_EUROPE'\n",
"europe_gas=FuelsDataFrame(wb_gas)\n",
"europe_gas.index=europe_gas.index.str.capitalize()\n",
"print(europe_gas.tail(10))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 245,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NG_HH NG_EUROPE\n",
"Date \n",
"Dec-2017 2.82 6.56\n",
"Jan-2018 3.87 7.56\n",
"Feb-2018 2.67 6.87\n",
"Mar-2018 2.69 7.03\n",
"Apr-2018 2.80 7.81\n",
"May-2018 2.80 7.19\n",
"Jun-2018 2.97 NaN\n",
"Jul-2018 2.83 NaN\n",
"Aug-2018 2.96 NaN\n",
"Sep-2018 3.00 NaN\n"
]
}
],
"source": [
"nat_gas=pd.merge(henry_hub, europe_gas, how='outer', left_index=True, right_index=True).sort_index_date(inplace=False)\n",
"nat_gas.to_sql('NATURAL_GAS', eng_co, if_exists='upsert')\n",
"print(nat_gas.tail(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LNG\n",
"https://bluegoldresearch.com/global-lng-prices - https://bluegoldresearch.com/datadownload?chnid=410"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Japan China UK Spain Brazil\n",
"Date \n",
"Dec-2017 10.0 9.9 8.0 9.3 9.4\n",
"Jan-2018 10.9 10.9 7.3 8.0 10.3\n",
"Feb-2018 9.5 9.4 7.0 7.2 9.4\n",
"Mar-2018 7.9 7.8 6.9 7.2 7.7\n",
"Apr-2018 7.2 7.1 6.7 6.9 7.1\n",
"May-2018 8.4 8.4 7.4 7.9 7.9\n",
"Jun-2018 10.3 10.2 7.4 8.1 9.8\n",
"Jul-2018 10.2 10.2 7.5 8.2 9.8\n",
"Aug-2018 10.4 10.4 7.9 8.6 10.0\n",
"Sep-2018 11.6 11.6 9.5 9.9 11.4\n"
]
}
],
"source": [
"strLNG_Prices=r'https://bluegoldresearch.com/datadownload?chnid=410'\n",
"\n",
"pdLNG=FuelsDataFrame(pd.read_csv(strLNG_Prices, index_col='Date', parse_dates=True)).rename(columns={'United Kingdom':'UK'})\n",
"pdLNG.index_to_string()\n",
"pdLNG.to_sql('LNG', eng_co, if_exists='upsert')\n",
"print(pdLNG.tail(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Petroleum and Diesel"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Russia, Ukraine, Belarussia, Moldova\n",
"https://www.benzin-price.ru/price.php"
]
},
{
"cell_type": "code",
"execution_count": 248,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<RequestsCookieJar[]>\n",
"https://www.benzin-price.ru\n",
"{'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/69.0.3497.81 Chrome/69.0.3497.81 Safari/537.36', 'accept-encoding': 'gzip, deflate, br', 'accept': 'text/plain, text/html', 'Connection': 'keep-alive', 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7', 'cache-control': 'max-age=0'}\n",
"<RequestsCookieJar[<Cookie ck=f24ec981d09f103016154725b0bc9174 for .benzin-price.ru/>, <Cookie sn=2c6f8c967d45b5a81d753fdd215fba1f for .benzin-price.ru/>, <Cookie PHPSESSID=2c6f8c967d45b5a81d753fdd215fba1f for www.benzin-price.ru/>]>\n",
"None\n"
]
}
],
"source": [
"lStartYear=2008\n",
"\n",
"#strPetrolBaseURL=r'https://www.benzin-price.ru/stat_month.php?month={month}&year={year}&region_id={region}'\n",
"strPetrolBaseURL=r'http://www.benzin-price.ru/stat_month.php?month={month}&year={year}&region_id={region}'\n",
"strPetroMainURL=r'https://www.benzin-price.ru'\n",
"\n",
"strPetrol2=r'http://prices.gasoil.media/fuel/dynamics/?fuel=Ai_92&period=month&subject=213'\n",
"\n",
"req_headers={'accept': 'text/plain, text/html',\n",
"'accept-encoding': 'gzip, deflate, br',\n",
"'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',\n",
"'cache-control': 'max-age=0',\n",
"'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/69.0.3497.81 Chrome/69.0.3497.81 Safari/537.36'}\n",
"\n",
"\n",
"\n",
"def get_regions(strURL=strPetroMainURL):\n",
" sessPet=requests.session()\n",
" print(sessPet.cookies)\n",
" sessPet.headers.update(req_headers)\n",
" \n",
" htmlPet=sessPet.get(strURL)\n",
" print(strURL)\n",
" print(sessPet.headers)\n",
" print(sessPet.cookies)\n",
" htmlPet.encoding=htmlPet.apparent_encoding\n",
" \n",
" soupPet=BeautifulSoup(htmlPet.text, 'html.parser')\n",
"\n",
" #h3b=soupPet.find('h3', string='Выберите регион')\n",
" #print(h3b.text)\n",
" return sessPet, soupPet\n",
"\n",
"sessionPetrol, soupPet=get_regions()\n",
"print(soupPet.title)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n"
]
}
],
"source": [
"a_regs=soupPet.findAll('a', {'href':re.compile(r'/price.php\\?region_id')})\n",
"\n",
"dctRegionCodes={re.search('(\\d+)$', a['href']).group(0):re.match('\\D+', a.text).group(0) for a in a_regs}\n",
"\n",
"pdfRegCodes=pd.Series(dctRegionCodes)\n",
"pdfRegCodes.name='RegionName'\n",
"pdfRegCodes.index.name='Code'\n",
"#pdfRegCodes.to_sql('regcodes_digest', eng_co, if_exists='replace')\n",
"print(len(pdfRegCodes))"
]
},
{
"cell_type": "code",
"execution_count": 268,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"new IP is 65.19.167.130\n",
"start read for 9 month; 2011 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 185.24.218.174\n",
" parsed (13,)\n",
"new IP is 185.220.101.6\n",
"start read for 10 month; 2016 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 91.219.236.171\n",
"error reading 2 - sleep and wait 60 sec... new IP is 197.231.221.211\n",
"error reading 3 - sleep and wait 90 sec... new IP is 109.248.9.8\n",
"error reading 4 - sleep and wait 120 sec... new IP is 92.63.103.241\n",
"error reading 5 - sleep and wait 150 sec... new IP is 185.220.101.33\n",
"fatal error reading!!!\n",
"80 24.525000\n",
"92 26.980000\n",
"92+ NaN\n",
"95 28.925000\n",
"95+ 30.055000\n",
"98 31.386667\n",
"98+ 32.937500\n",
"100 NaN\n",
"100+ NaN\n",
"ДТ 26.367500\n",
"ДТ+ 27.282500\n",
"Газ 16.575000\n",
"region 77.000000\n",
"Name: Sep.2011, dtype: float64\n"
]
}
],
"source": [
"from stem import Signal\n",
"from stem.control import Controller\n",
"from time import sleep\n",
"import socket\n",
"import socks\n",
"\n",
"cur_month=9\n",
"cur_region=77\n",
"cur_year=2018\n",
"\n",
"#socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, \"127.0.0.1\", 9050, True)\n",
"#socket.socket = socks.socksocket\n",
"#controller=Controller.from_port(port=9051, address='127.0.0.1')\n",
"\n",
"lstUA=[r'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',\n",
"r'Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0', \n",
" r'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36 OPR/38.0.2220.41',\n",
" r'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',\n",
" r'Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0)',\n",
" r'Googlebot/2.1 (+http://www.google.com/bot.html)'\n",
" ]\n",
"\n",
"strPetrolBaseURL1=r'https://www.benzin-price.ru/stat_month.php?month={month}&year={year}&region_id={region}'\n",
"strPetroMainURL1=r'https://www.benzin-price.ru'\n",
"\n",
"def cnange_tor_proxy():\n",
" #socket.setdefaulttimeout(5)\n",
" \n",
" with Controller.from_port(address='127.0.0.1', port=9051) as controller:\n",
" controller.authenticate(password='cmasf')\n",
" controller.signal(Signal.NEWNYM)\n",
" sleep(10)\n",
" session=requests.session()\n",
" hdrs=req_headers\n",
" hdrs['user-agent']=lstUA[4]\n",
" session.headers=hdrs\n",
" session.proxies={'http': 'socks5://127.0.0.1:9050',\n",
" 'https': 'socks5://127.0.0.1:9050' }\n",
" r=session.get('http://icanhazip.com/')\n",
" print('new IP is', r.text.strip())\n",
" return session\n",
"\n",
"\n",
"def get_month_prices(current_month=10, current_year=2018, current_region=77, session=sessionPetrol):\n",
" def make_soup():\n",
" #print(strPetrolBaseURL.format(month=current_month, region=current_region, year=current_year))\n",
" #print(session.headers)\n",
" #session.cookies.clear()\n",
" #hdrs=req_headers\n",
" #hdrs['user-agent']=lstUA[3]\n",
" #r=session.get(strPetroMainURL1)\n",
" #sleep(10)\n",
" #if len(r.text)!=0:\n",
" # print('response ', r.status_code)\n",
" # print('headers ', r.headers)\n",
" # print('cookies ', r.cookies)\n",
" \n",
" #print(r, session.cookies)\n",
" #sleep(5)\n",
" htmlPet=session.get(strPetrolBaseURL1.format(month=current_month, region=current_region, year=current_year))\n",
" #print('response ', htmlPet.status_code)\n",
" #print('headers ', htmlPet.headers)\n",
" #print('cookies ', htmlPet.cookies)\n",
" htmlPet.encoding=htmlPet.apparent_encoding\n",
" return BeautifulSoup(htmlPet.text, 'html.parser')\n",
" \n",
" num_trying=0\n",
"\n",
" lstHeaders=[]\n",
" lstPrices=[]\n",
" \n",
" print('start read for {0} month; {1} year; {2} region'.format(current_month, current_year, current_region), end=' ... ')\n",
" \n",
" #print(session.headers)\n",
" \n",
" while num_trying<6:\n",
" soupPetPrice=make_soup()\n",
" try:\n",
" td_work=soupPetPrice.find('td', class_='content')\n",
" tblWorks=td_work.findAll('table')\n",
" lstHeaders=[th.text for th in tblWorks[0].findAll('th')]\n",
" lstPrices=list(filter(None, [[td.text.strip() for td in tr.findAll('td')] for tr in tblWorks[0].findAll('tr')]))\n",
" break\n",
" except:\n",
" if num_trying==5:\n",
" print('fatal error reading!!!')\n",
" return None\n",
" num_trying+=1\n",
" sec_wait=30*num_trying\n",
" print('error reading {0} - sleep and wait {1} sec... '.format(num_trying, sec_wait), end=' ')\n",
" sleep(sec_wait)\n",
" session=cnange_tor_proxy()\n",
" \n",
" pdfMonth=pd.DataFrame(lstPrices, columns=lstHeaders).set_index('период')\n",
" pdfMonth=pdfMonth.applymap(lambda x: float(x) if x !='' else np.nan)\n",
" pdfMonth=pdfMonth.mean(axis=0)\n",
" pdfMonth.name=date(current_year, current_month, 1).strftime('%b.%Y')\n",
" pdfMonth['region']=current_region\n",
" print(' parsed ', pdfMonth.shape)\n",
" sleep(15)\n",
" return pdfMonth\n",
"\n",
"#print(get_month_prices())\n",
"\n",
"tmp1=get_month_prices(current_month=9, current_year=2011, current_region=77, session=cnange_tor_proxy())\n",
"tmp2=get_month_prices(current_month=10, current_year=2016, current_region=77, session=cnange_tor_proxy())\n",
"\n",
"#cnange_tor_proxy()\n",
"print(tmp1)"
]
},
{
"cell_type": "code",
"execution_count": 237,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" RegionName\n",
"Code \n",
"77 г. Москва\n",
"50 Московская область и новая Москва\n",
"78 г. Санкт-Петербург\n",
"47 Ленинградская область\n",
"22 Алтайский край\n",
"28 Амурская область\n",
"29 Архангельская область\n",
"30 Астраханская область\n",
"31 Белгородская область\n",
"32 Брянская область\n",
"['77', '50', '78', '47', '22', '28', '29', '30', '31', '32', '33', '34', '35', '36', '79', '75', '37', '38', '7', '39', '40', '41', '42', '43', '44', '23', '24', '45', '46', '48', '49', '51', '83', '52', '53', '54', '55', '56', '57', '58', '59', '25', '60', '1', '4', '2', '3', '5', '6', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '19', '61', '62', '63', '64', '65', '66', '67', '26', '68', '69', '70', '71', '72', '18', '73', '27', '86', '74', '95', '21', '87', '89', '76', '2040', '2043', '1010', '1020', '1030', '1040', '1050', '1060', '1070', '2030', '2005', '2007', '2012', '2014', '2018', '2021', '2023', '2026', '2032', '2035', '2009', '2046', '2048', '2051', '2053', '2019', '2059', '2061', '2063', '2065', '2068', '2071', '2074', '2077', '3001', '3002']\n"
]
}
],
"source": [
"#print(pd.concat([tmp1, tmp2], axis=1).T)\n",
"#tmpErr=get_month_prices(current_month=1, current_year=lStartYear, current_region=77)\n",
"#print(tmpErr)\n",
"\n",
"pdfRegCodes=pd.read_sql('regcodes_digest', con=eng_co, index_col='Code')\n",
"print(pdfRegCodes.head(10))\n",
"print(pdfRegCodes.index.tolist())\n",
"\n",
"#for r in pdfRegCodes.loc[pdfRegCodes.index.str.len()< 3].index.tolist():\n",
"# print(r)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"new IP is 185.220.101.28\n",
"start read for 1 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 176.10.99.200\n",
"error reading 2 - sleep and wait 60 sec... new IP is 104.223.123.98\n",
"error reading 3 - sleep and wait 90 sec... new IP is 197.231.221.211\n",
"error reading 4 - sleep and wait 120 sec... new IP is 185.227.68.78\n",
"error reading 5 - sleep and wait 150 sec... new IP is 37.187.129.166\n",
"fatal error reading!!!\n",
"start read for 2 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 37.187.129.166\n",
"error reading 2 - sleep and wait 60 sec... new IP is 195.176.3.19\n",
"error reading 3 - sleep and wait 90 sec... new IP is 46.38.235.14\n",
"error reading 4 - sleep and wait 120 sec... new IP is 79.137.79.167\n",
"error reading 5 - sleep and wait 150 sec... new IP is 144.217.80.80\n",
"fatal error reading!!!\n",
"start read for 3 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 192.42.116.13\n",
"error reading 2 - sleep and wait 60 sec... new IP is 163.172.67.180\n",
"error reading 3 - sleep and wait 90 sec... new IP is 81.17.27.137\n",
"error reading 4 - sleep and wait 120 sec... new IP is 178.17.166.147\n",
"error reading 5 - sleep and wait 150 sec... new IP is 51.15.65.25\n",
"fatal error reading!!!\n",
"start read for 4 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 185.107.47.215\n",
"error reading 2 - sleep and wait 60 sec... new IP is 142.44.232.99\n",
"error reading 3 - sleep and wait 90 sec... new IP is 185.107.47.215\n",
"error reading 4 - sleep and wait 120 sec... new IP is 192.42.116.16\n",
"error reading 5 - sleep and wait 150 sec... new IP is 85.248.227.165\n",
"fatal error reading!!!\n",
"start read for 5 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 199.195.250.77\n",
"error reading 2 - sleep and wait 60 sec... new IP is 185.220.101.5\n",
"error reading 3 - sleep and wait 90 sec... new IP is 185.100.87.206\n",
"error reading 4 - sleep and wait 120 sec... new IP is 213.61.215.54\n",
"error reading 5 - sleep and wait 150 sec... new IP is 192.42.116.20\n",
"fatal error reading!!!\n",
"start read for 6 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 79.137.68.85\n",
"error reading 2 - sleep and wait 60 sec... new IP is 144.217.161.119\n",
"error reading 3 - sleep and wait 90 sec... new IP is 185.220.101.33\n",
"error reading 4 - sleep and wait 120 sec... new IP is 185.165.168.168\n",
"error reading 5 - sleep and wait 150 sec... new IP is 163.172.41.228\n",
"fatal error reading!!!\n",
"start read for 7 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 185.220.101.7\n",
"error reading 2 - sleep and wait 60 sec... new IP is 195.135.194.134\n",
"error reading 3 - sleep and wait 90 sec... new IP is 185.220.102.7\n",
"error reading 4 - sleep and wait 120 sec... new IP is 146.0.42.41\n",
"error reading 5 - sleep and wait 150 sec... new IP is 104.223.123.98\n",
"fatal error reading!!!\n",
"start read for 8 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 81.17.27.136\n",
"error reading 2 - sleep and wait 60 sec... new IP is 65.19.167.130\n",
"error reading 3 - sleep and wait 90 sec... new IP is 192.42.116.16\n",
"error reading 4 - sleep and wait 120 sec... new IP is 176.10.104.240\n",
"error reading 5 - sleep and wait 150 sec... new IP is 185.220.101.32\n",
"fatal error reading!!!\n",
"start read for 9 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 167.99.42.89\n",
"error reading 2 - sleep and wait 60 sec... new IP is 62.210.37.82\n",
"error reading 3 - sleep and wait 90 sec... new IP is 158.69.37.14\n",
"error reading 4 - sleep and wait 120 sec... new IP is 185.220.102.7\n",
"error reading 5 - sleep and wait 150 sec... new IP is 185.193.125.190\n",
"fatal error reading!!!\n",
"start read for 10 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 18.85.22.239\n",
"error reading 2 - sleep and wait 60 sec... new IP is 185.220.102.8\n",
"error reading 3 - sleep and wait 90 sec... new IP is 197.231.221.211\n",
"error reading 4 - sleep and wait 120 sec... new IP is 23.129.64.103\n",
"error reading 5 - sleep and wait 150 sec... new IP is 162.247.74.206\n",
"fatal error reading!!!\n",
"start read for 11 month; 2008 year; 77 region ... error reading 1 - sleep and wait 30 sec... new IP is 162.247.74.202\n",
"error reading 2 - sleep and wait 60 sec... new IP is 141.255.162.38\n",
"error reading 3 - sleep and wait 90 sec... new IP is 176.126.252.11\n",
"error reading 4 - sleep and wait 120 sec... "
]
},
{
"ename": "ConnectionError",
"evalue": "SOCKSHTTPConnectionPool(host='icanhazip.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.contrib.socks.SOCKSConnection object at 0x7faed251a588>: Failed to establish a new connection: 0x01: General SOCKS server failure',))",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-96-2f7ad932ebce>\u001b[0m in \u001b[0;36mget_month_prices\u001b[0;34m(current_month, current_year, current_region, session)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0mtd_work\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msoupPetPrice\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'td'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclass_\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'content'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0mtblWorks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtd_work\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfindAll\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'table'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 52\u001b[0m \u001b[0mlstHeaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mth\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mth\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtblWorks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfindAll\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'th'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'findAll'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mGeneralProxyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/contrib/socks.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0mextra_kw\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m )\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/socks.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(dest_pair, timeout, source_address, proxy_type, proxy_addr, proxy_port, proxy_rdns, proxy_username, proxy_password, socket_options)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 262\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 263\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/socks.py\u001b[0m in \u001b[0;36mcreate_connection\u001b[0;34m(dest_pair, timeout, source_address, proxy_type, proxy_addr, proxy_port, proxy_rdns, proxy_username, proxy_password, socket_options)\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mremote_host\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mremote_port\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msock\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/socks.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetblocking\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/socks.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self, dest_pair)\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 855\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mGeneralProxyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Socket error\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 856\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mProxyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mGeneralProxyError\u001b[0m: Socket error: 0x01: General SOCKS server failure",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mNewConnectionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m chunked=chunked)\n\u001b[0m\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 354\u001b[0;31m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 355\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1238\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1239\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1284\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1285\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1286\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1233\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1234\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1235\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1025\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1026\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1027\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_open\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 964\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 965\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mconnect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconnect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 196\u001b[0;31m \u001b[0mconn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_new_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 197\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_conn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/contrib/socks.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m \u001b[0;34m\"Failed to establish a new connection: %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 111\u001b[0m )\n",
"\u001b[0;31mNewConnectionError\u001b[0m: <urllib3.contrib.socks.SOCKSConnection object at 0x7faed251a588>: Failed to establish a new connection: 0x01: General SOCKS server failure",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mMaxRetryError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 444\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 445\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 446\u001b[0m )\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 637\u001b[0m retries = retries.increment(method, url, error=e, _pool=self,\n\u001b[0;32m--> 638\u001b[0;31m _stacktrace=sys.exc_info()[2])\n\u001b[0m\u001b[1;32m 639\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/util/retry.py\u001b[0m in \u001b[0;36mincrement\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m 397\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_retry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_exhausted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 398\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mMaxRetryError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_pool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merror\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mResponseError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcause\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mMaxRetryError\u001b[0m: SOCKSHTTPConnectionPool(host='icanhazip.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.contrib.socks.SOCKSConnection object at 0x7faed251a588>: Failed to establish a new connection: 0x01: General SOCKS server failure',))",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mConnectionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-99-f416e234662d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mpdfPetrolPrices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpdfRegCodes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpdfRegCodes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'77'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mpdfPetrolPrices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mget_month_prices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_region\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_month\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_year\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mSessPet\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlStartYear\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2019\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m13\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;31m#lst=[(r[0], m, y) for y in range(lStartYear, 2019) for m in range(1, 13) ]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m#print(lst)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-99-f416e234662d>\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mpdfPetrolPrices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpdfRegCodes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpdfRegCodes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'77'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mpdfPetrolPrices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mget_month_prices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcurrent_region\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_month\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_year\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mSessPet\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlStartYear\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2019\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m13\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;31m#lst=[(r[0], m, y) for y in range(lStartYear, 2019) for m in range(1, 13) ]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m#print(lst)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-96-2f7ad932ebce>\u001b[0m in \u001b[0;36mget_month_prices\u001b[0;34m(current_month, current_year, current_region, session)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error reading {0} - sleep and wait {1} sec... '\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_trying\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msec_wait\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msec_wait\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0msession\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcnange_tor_proxy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0mpdfMonth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlstPrices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlstHeaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'период'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-96-2f7ad932ebce>\u001b[0m in \u001b[0;36mcnange_tor_proxy\u001b[0;34m()\u001b[0m\n\u001b[1;32m 25\u001b[0m session.proxies={'http': 'socks5://127.0.0.1:9050',\n\u001b[1;32m 26\u001b[0m 'https': 'socks5://127.0.0.1:9050' }\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'http://icanhazip.com/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'new IP is'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, url, **kwargs)\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'allow_redirects'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 525\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'GET'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 526\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 510\u001b[0m }\n\u001b[1;32m 511\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 512\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 513\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 620\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 622\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 623\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 511\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mSSLError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 512\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 513\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 514\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mClosedPoolError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mConnectionError\u001b[0m: SOCKSHTTPConnectionPool(host='icanhazip.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.contrib.socks.SOCKSConnection object at 0x7faed251a588>: Failed to establish a new connection: 0x01: General SOCKS server failure',))"
]
}
],
"source": [
"#print(pdfRegCodes.loc[pdfRegCodes.index.str.len()<3])\n",
"#for r in pdfRegCodes.loc[pdfRegCodes.index.str.len()<3].iteritems():\n",
"\n",
"SessPet=cnange_tor_proxy()\n",
"pdfPetrolPrices=None\n",
"for r in pdfRegCodes.loc[pdfRegCodes.index=='77'].index.tolist():\n",
" pdfPetrolPrices=pd.concat([get_month_prices(current_region=int(r), current_month=m, current_year=y, session=SessPet) for y in range(lStartYear, 2019) for m in range(1, 13) ], axis=1)\n",
" #lst=[(r[0], m, y) for y in range(lStartYear, 2019) for m in range(1, 13) ]\n",
" #print(lst)\n",
" #for y in range(lStartYear, 2019):\n",
" # for m in range(1, 13):\n",
" # print(r[0], m, y)\n",
"\n",
"print(pdfPetrolPrices)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"petrol=FuelsDataFrame(pdfPetrolPrices)\n",
"petrol.index.name='Date'\n",
"petrol=petrol.sort_index_date(inplace=False)\n",
"\n",
"petrol.to_sql('PETROL_RU', eng_co, if_exists='upsert')\n",
"print(petrol.tail(5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment