Skip to content

Instantly share code, notes, and snippets.

@kokes
Created September 27, 2015 11:27
Show Gist options
  • Save kokes/cc3c98ba7c7f403380b1 to your computer and use it in GitHub Desktop.
Save kokes/cc3c98ba7c7f403380b1 to your computer and use it in GitHub Desktop.
Strukturální fondy EU, probraný v Pythonu
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Strukturální fondy"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pyquery import PyQuery as pq\n",
"import pandas as pd\n",
"pd.set_option('precision', 2)\n",
"import numpy as np\n",
"import urllib\n",
"\n",
"import PyPDF2\n",
"from StringIO import StringIO\n",
"import re\n",
"\n",
"import sys\n",
"reload(sys) # Reload does the trick!\n",
"sys.setdefaultencoding('UTF8')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Seznam vládních institucí"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"MFČR je má [na webu](http://www.mfcr.cz/cs/verejny-sektor/hospodareni/rozpoctove-ramce-statisticke-informace/verejny-sektor/sektor-vladnich-instituci)."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"mfcr_url = 'http://www.mfcr.cz/assets/cs/media/Rozp-ramce-EU-85-2011_2015_Seznam-vladnich-instituci-v-CR_v01.xlsx'\n",
"mfcr_fn, headers = urllib.urlretrieve(mfcr_url)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[u'Sektor vl\\xe1dn\\xedch instituc\\xed-S.13',\n",
" u'\\u010c\\xedseln\\xedk - pr\\xe1vn\\xedch forem']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mfcr_xls = pd.ExcelFile(mfcr_fn)\n",
"mfcr_xls.sheet_names"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ico</th>\n",
" <th>nazev</th>\n",
" <th>nace</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>25217968</td>\n",
" <td>Františkolázeňská výtopna, s.r.o.</td>\n",
" <td>6820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25399039</td>\n",
" <td>OBEC-INVEST, s.r.o.</td>\n",
" <td>68310</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25700898</td>\n",
" <td>Domov Dřevčice, spol. s r.o.</td>\n",
" <td>6820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>26076357</td>\n",
" <td>Strakonická televize, s.r.o.</td>\n",
" <td>60200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>27577708</td>\n",
" <td>Divadla Kladno s.r.o.</td>\n",
" <td>90020</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ico nazev nace\n",
"0 25217968 Františkolázeňská výtopna, s.r.o. 6820\n",
"1 25399039 OBEC-INVEST, s.r.o. 68310\n",
"2 25700898 Domov Dřevčice, spol. s r.o. 6820\n",
"3 26076357 Strakonická televize, s.r.o. 60200\n",
"4 27577708 Divadla Kladno s.r.o. 90020"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stfir = mfcr_xls.parse(mfcr_xls.sheet_names[0], skiprows=range(3))\n",
"stfir = stfir.iloc[:,(1,2,5)]\n",
"stfir.columns = 'ico, nazev, nace'.split(', ')\n",
"#stfir.ico = stfir.ico.astype('string')\n",
"stfir.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Neni tam budvar."
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ico</th>\n",
" <th>nazev</th>\n",
" <th>nace</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>25125877</td>\n",
" <td>BALMED Praha, státní podnik</td>\n",
" <td>68202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>459</th>\n",
" <td>71377999</td>\n",
" <td>Agentura pro podporu podnikání a investic Czec...</td>\n",
" <td>70220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>695</th>\n",
" <td>100340</td>\n",
" <td>Střední odborná škola a Střední odborné učiliš...</td>\n",
" <td>85322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1010</th>\n",
" <td>575933</td>\n",
" <td>Střední škola služeb a podnikání, Ostrava-Poru...</td>\n",
" <td>85321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981</th>\n",
" <td>47008539</td>\n",
" <td>Městský bytový podnik Kralupy nad Vltavou</td>\n",
" <td>6820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2775</th>\n",
" <td>49629077</td>\n",
" <td>Střední odborné učiliště gastronomie a podnikání</td>\n",
" <td>85321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2918</th>\n",
" <td>60075953</td>\n",
" <td>Střední škola obchodu, služeb a podnikání a Vy...</td>\n",
" <td>85322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2988</th>\n",
" <td>60153687</td>\n",
" <td>Bytový podnik Vrchlabí, příspěvková organizace</td>\n",
" <td>6820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4115</th>\n",
" <td>63731371</td>\n",
" <td>Střední škola automobilní, mechanizace a podni...</td>\n",
" <td>85322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4289</th>\n",
" <td>64669033</td>\n",
" <td>Bytový podnik města Železného Brodu</td>\n",
" <td>6820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5934</th>\n",
" <td>70947066</td>\n",
" <td>Městský bytový podnik Karolinka, příspěvková o...</td>\n",
" <td>6820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8210</th>\n",
" <td>72053666</td>\n",
" <td>Karlovarská agentura rozvoje podnikání, příspě...</td>\n",
" <td>69200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8225</th>\n",
" <td>72068272</td>\n",
" <td>Městský kulturní podnik - FIDIKO Žamberk</td>\n",
" <td>90040</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ico nazev nace\n",
"40 25125877 BALMED Praha, státní podnik 68202\n",
"459 71377999 Agentura pro podporu podnikání a investic Czec... 70220\n",
"695 100340 Střední odborná škola a Střední odborné učiliš... 85322\n",
"1010 575933 Střední škola služeb a podnikání, Ostrava-Poru... 85321\n",
"1981 47008539 Městský bytový podnik Kralupy nad Vltavou 6820\n",
"2775 49629077 Střední odborné učiliště gastronomie a podnikání 85321\n",
"2918 60075953 Střední škola obchodu, služeb a podnikání a Vy... 85322\n",
"2988 60153687 Bytový podnik Vrchlabí, příspěvková organizace 6820\n",
"4115 63731371 Střední škola automobilní, mechanizace a podni... 85322\n",
"4289 64669033 Bytový podnik města Železného Brodu 6820\n",
"5934 70947066 Městský bytový podnik Karolinka, příspěvková o... 6820\n",
"8210 72053666 Karlovarská agentura rozvoje podnikání, příspě... 69200\n",
"8225 72068272 Městský kulturní podnik - FIDIKO Žamberk 90040"
]
},
"execution_count": 156,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stfir[stfir.nazev.str.contains('podnik')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# doplnime nuly do IČ, který kvůli konverzi na int přišly o nuly na začátku\n",
"# for ind, ico in zip(stfir.index, stfir.ico):\n",
"# if len(ico) == 8: continue\n",
"# stfir.loc[ind,'ico'] = '0'*(8-len(ico)) + ico"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Přehled dotací\n",
"Aktuální spreadsheet je na [adrese MMR](http://www.strukturalni-fondy.cz/cs/Informace-o-cerpani/Seznamy-prijemcu). Jelikož se každý měsíc aktualizuje, budeme muset najít aktuální odkaz."
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"mmr_url = 'http://www.strukturalni-fondy.cz/cs/Informace-o-cerpani/Seznamy-prijemcu'\n",
"mmr = pq(mmr_url)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"seznam = mmr.find(\"ul#p_lt_zoneContent_VypisSouboru1_rTree2_ctl00_item_tree a\").attr('href')\n",
"szn_fn, headers = urllib.urlretrieve('http://www.strukturalni-fondy.cz' + seznam)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>firma</th>\n",
" <th>ico</th>\n",
" <th>popis</th>\n",
" <th>program</th>\n",
" <th>fond</th>\n",
" <th>datum_alokace</th>\n",
" <th>castka_alok</th>\n",
" <th>datum_platby</th>\n",
" <th>castka_propl</th>\n",
" <th>stav</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>\" A L L E G R O \" s.r.o.</td>\n",
" <td>48951862</td>\n",
" <td>Inovace výroby interiérových kovových\\nstavebn...</td>\n",
" <td>OP Podnikání a inovace</td>\n",
" <td>ERDF</td>\n",
" <td>19.10.2009</td>\n",
" <td>14875000</td>\n",
" <td>02.11.2012</td>\n",
" <td>14304447</td>\n",
" <td>Finalized</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>\" A L L E G R O \" s.r.o.</td>\n",
" <td>48951862</td>\n",
" <td>Podpora exportu firmy Allegro s.r.o. na\\nSlove...</td>\n",
" <td>OP Podnikání a inovace</td>\n",
" <td>ERDF</td>\n",
" <td>08.10.2008</td>\n",
" <td>1615000</td>\n",
" <td>30.09.2009</td>\n",
" <td>1164770</td>\n",
" <td>Finalized</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>\" A L L E G R O \" s.r.o.</td>\n",
" <td>48951862</td>\n",
" <td>Realizace výroby kovových stropních podhledů</td>\n",
" <td>OP Podnikání a inovace</td>\n",
" <td>ERDF</td>\n",
" <td>21.03.2008</td>\n",
" <td>14869900</td>\n",
" <td>23.09.2008</td>\n",
" <td>14869900</td>\n",
" <td>Finalized</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>\" A L L E G R O \" s.r.o.</td>\n",
" <td>48951862</td>\n",
" <td>Rekonstrukce objektu v Bratkovicích</td>\n",
" <td>OP Podnikání a inovace</td>\n",
" <td>ERDF</td>\n",
" <td>07.09.2010</td>\n",
" <td>11059350</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>Cancelled</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>\" IZOS s.r.o. \"</td>\n",
" <td>47285338</td>\n",
" <td>CONECO Bratislava</td>\n",
" <td>OP Podnikání a inovace</td>\n",
" <td>ERDF</td>\n",
" <td>02.09.2010</td>\n",
" <td>381650</td>\n",
" <td>01.09.2011</td>\n",
" <td>285450</td>\n",
" <td>Finalized</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" firma ico \\\n",
"0 \" A L L E G R O \" s.r.o. 48951862 \n",
"1 \" A L L E G R O \" s.r.o. 48951862 \n",
"2 \" A L L E G R O \" s.r.o. 48951862 \n",
"3 \" A L L E G R O \" s.r.o. 48951862 \n",
"4 \" IZOS s.r.o. \" 47285338 \n",
"\n",
" popis program \\\n",
"0 Inovace výroby interiérových kovových\\nstavebn... OP Podnikání a inovace \n",
"1 Podpora exportu firmy Allegro s.r.o. na\\nSlove... OP Podnikání a inovace \n",
"2 Realizace výroby kovových stropních podhledů OP Podnikání a inovace \n",
"3 Rekonstrukce objektu v Bratkovicích OP Podnikání a inovace \n",
"4 CONECO Bratislava OP Podnikání a inovace \n",
"\n",
" fond datum_alokace castka_alok datum_platby castka_propl stav \n",
"0 ERDF 19.10.2009 14875000 02.11.2012 14304447 Finalized \n",
"1 ERDF 08.10.2008 1615000 30.09.2009 1164770 Finalized \n",
"2 ERDF 21.03.2008 14869900 23.09.2008 14869900 Finalized \n",
"3 ERDF 07.09.2010 11059350 NaN 0 Cancelled \n",
"4 ERDF 02.09.2010 381650 01.09.2011 285450 Finalized "
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot = pd.read_excel(szn_fn, skiprows=range(8))\n",
"dot.columns = 'firma, ico, popis, program, fond, datum_alokace, castka_alok, datum_platby, castka_propl, stav'.split(', ')\n",
"\n",
"dot.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(65780, 10)"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.shape(dot)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Finalized 44718\n",
"Ongoing 18332\n",
"Cancelled 2729\n",
"dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot.stav.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Finalized 44718\n",
"Ongoing 18332\n",
"dtype: int64"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot=dot[dot.stav.isin(['Finalized', 'Ongoing'])]\n",
"dot.stav.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"OP Životní prostředí 16818\n",
"OP Podnikání a inovace 11976\n",
"OP Vzdělávání pro\\nkonkurenceschopnost 10592\n",
"Integrovaný operační program 8491\n",
"OP Lidské zdroje a zaměstnanost 5404\n",
"OP Rybářství 1182\n",
"ROP NUTS II Střední Morava 1172\n",
"ROP NUTS II Jihozápad 1117\n",
"ROP NUTS II Střední Čechy 1030\n",
"OP Praha Adaptabilita 1021\n",
"ROP NUTS II Moravskoslezsko 996\n",
"ROP NUTS II Jihovýchod 925\n",
"ROP NUTS II Severovýchod 720\n",
"ROP NUTS II Severozápad 490\n",
"OP Praha Konkurenceschopnost 352\n",
"OP Technická pomoc 328\n",
"OP Doprava 244\n",
"OP Výzkum a vývoj pro inovace 192\n",
"dtype: int64"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot.program.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Doprava má málo projektů, ale hodně peněz. TODO: peněz na průměrnej projekt"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"program\n",
"Integrovaný operační program 43.0\n",
"OP Doprava 152.5\n",
"OP Lidské zdroje a zaměstnanost 58.8\n",
"OP Podnikání a inovace 93.6\n",
"OP Praha Adaptabilita 3.0\n",
"OP Praha Konkurenceschopnost 7.0\n",
"OP Rybářství 0.7\n",
"OP Technická pomoc 4.2\n",
"OP Vzdělávání pro\\nkonkurenceschopnost 46.3\n",
"OP Výzkum a vývoj pro inovace 50.8\n",
"OP Životní prostředí 118.3\n",
"ROP NUTS II Jihovýchod 22.5\n",
"ROP NUTS II Jihozápad 18.9\n",
"ROP NUTS II Moravskoslezsko 19.4\n",
"ROP NUTS II Severovýchod 17.7\n",
"ROP NUTS II Severozápad 18.2\n",
"ROP NUTS II Střední Morava 18.0\n",
"ROP NUTS II Střední Čechy 17.5\n",
"Name: castka_alok, dtype: float64"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot.groupby('program').sum().loc[:,'castka_alok']/10**9"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Státní podniky"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Veřejný firmy berou tolik:"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"500.91038528135999"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot.loc[dot.ico.isin(stfir.ico),'castka_alok'].sum()/10**9"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Z celkovýho poolu asi 700 mld."
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"710.27491644792008"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot.castka_alok.sum()/10**9"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Of michael kors, chybej krajema a mestama zrizeny podniky - dopravni podniky, nemocnice."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Velký ryby"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Chce to seřadit podle sumy částek."
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>castka_alok</th>\n",
" <th>castka_propl</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ico</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.3e+08</td>\n",
" <td>4.3e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>205</th>\n",
" <td>3.1e+07</td>\n",
" <td>1.8e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1171</th>\n",
" <td>8.5e+08</td>\n",
" <td>6.9e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1350</th>\n",
" <td>2.9e+06</td>\n",
" <td>2.6e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1481</th>\n",
" <td>2.5e+07</td>\n",
" <td>6.5e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1490</th>\n",
" <td>6.3e+06</td>\n",
" <td>1.4e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2739</th>\n",
" <td>1.7e+09</td>\n",
" <td>1.2e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5886</th>\n",
" <td>9.7e+09</td>\n",
" <td>1.8e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6033</th>\n",
" <td>4.5e+06</td>\n",
" <td>2.0e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6599</th>\n",
" <td>1.5e+08</td>\n",
" <td>1.1e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6947</th>\n",
" <td>9.3e+08</td>\n",
" <td>7.2e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6963</th>\n",
" <td>4.1e+08</td>\n",
" <td>2.8e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7064</th>\n",
" <td>8.1e+09</td>\n",
" <td>3.1e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7536</th>\n",
" <td>1.5e+09</td>\n",
" <td>8.2e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8648</th>\n",
" <td>4.8e+07</td>\n",
" <td>2.9e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8702</th>\n",
" <td>1.9e+08</td>\n",
" <td>1.5e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8753</th>\n",
" <td>1.1e+07</td>\n",
" <td>3.6e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9971</th>\n",
" <td>3.8e+07</td>\n",
" <td>2.5e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10235</th>\n",
" <td>4.1e+06</td>\n",
" <td>4.1e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10367</th>\n",
" <td>1.1e+07</td>\n",
" <td>4.7e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10545</th>\n",
" <td>1.0e+07</td>\n",
" <td>9.9e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10944</th>\n",
" <td>3.3e+07</td>\n",
" <td>1.9e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11835</th>\n",
" <td>1.5e+08</td>\n",
" <td>1.1e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12114</th>\n",
" <td>3.0e+06</td>\n",
" <td>2.1e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12122</th>\n",
" <td>4.0e+07</td>\n",
" <td>1.4e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12131</th>\n",
" <td>2.5e+07</td>\n",
" <td>1.8e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12190</th>\n",
" <td>5.7e+07</td>\n",
" <td>1.3e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12556</th>\n",
" <td>1.6e+06</td>\n",
" <td>5.5e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12645</th>\n",
" <td>1.2e+07</td>\n",
" <td>3.8e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13251</th>\n",
" <td>4.8e+06</td>\n",
" <td>4.0e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76168051</th>\n",
" <td>1.3e+06</td>\n",
" <td>1.0e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76222071</th>\n",
" <td>1.6e+06</td>\n",
" <td>1.6e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76614174</th>\n",
" <td>3.8e+06</td>\n",
" <td>3.8e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76626270</th>\n",
" <td>1.0e+05</td>\n",
" <td>1.0e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86594265</th>\n",
" <td>3.9e+05</td>\n",
" <td>3.9e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86652036</th>\n",
" <td>8.5e+06</td>\n",
" <td>8.4e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86726889</th>\n",
" <td>4.3e+05</td>\n",
" <td>4.3e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86770713</th>\n",
" <td>1.0e+07</td>\n",
" <td>8.7e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86796968</th>\n",
" <td>1.2e+06</td>\n",
" <td>1.2e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86797042</th>\n",
" <td>7.1e+05</td>\n",
" <td>7.1e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86870971</th>\n",
" <td>8.5e+04</td>\n",
" <td>8.5e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86891332</th>\n",
" <td>8.5e+04</td>\n",
" <td>8.5e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86952480</th>\n",
" <td>3.5e+05</td>\n",
" <td>3.4e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86956931</th>\n",
" <td>4.7e+06</td>\n",
" <td>4.7e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87074079</th>\n",
" <td>2.8e+06</td>\n",
" <td>2.8e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87149257</th>\n",
" <td>3.0e+07</td>\n",
" <td>3.0e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87157624</th>\n",
" <td>1.7e+05</td>\n",
" <td>1.6e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87192993</th>\n",
" <td>2.1e+05</td>\n",
" <td>2.1e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87592827</th>\n",
" <td>8.6e+05</td>\n",
" <td>6.8e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87847795</th>\n",
" <td>2.1e+06</td>\n",
" <td>2.1e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87964911</th>\n",
" <td>1.9e+06</td>\n",
" <td>1.9e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88067807</th>\n",
" <td>1.3e+06</td>\n",
" <td>1.3e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88068331</th>\n",
" <td>5.4e+06</td>\n",
" <td>5.4e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88124304</th>\n",
" <td>1.3e+06</td>\n",
" <td>1.3e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88387402</th>\n",
" <td>1.9e+06</td>\n",
" <td>1.3e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88701221</th>\n",
" <td>8.5e+05</td>\n",
" <td>5.1e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88751287</th>\n",
" <td>3.3e+06</td>\n",
" <td>3.3e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88799387</th>\n",
" <td>4.0e+06</td>\n",
" <td>3.8e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88845907</th>\n",
" <td>6.2e+06</td>\n",
" <td>5.7e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88893472</th>\n",
" <td>8.3e+04</td>\n",
" <td>0.0e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>24348 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" castka_alok castka_propl\n",
"ico \n",
"0 4.3e+08 4.3e+08\n",
"205 3.1e+07 1.8e+07\n",
"1171 8.5e+08 6.9e+08\n",
"1350 2.9e+06 2.6e+06\n",
"1481 2.5e+07 6.5e+06\n",
"1490 6.3e+06 1.4e+05\n",
"2739 1.7e+09 1.2e+09\n",
"5886 9.7e+09 1.8e+09\n",
"6033 4.5e+06 2.0e+06\n",
"6599 1.5e+08 1.1e+08\n",
"6947 9.3e+08 7.2e+08\n",
"6963 4.1e+08 2.8e+08\n",
"7064 8.1e+09 3.1e+09\n",
"7536 1.5e+09 8.2e+08\n",
"8648 4.8e+07 2.9e+07\n",
"8702 1.9e+08 1.5e+08\n",
"8753 1.1e+07 3.6e+06\n",
"9971 3.8e+07 2.5e+07\n",
"10235 4.1e+06 4.1e+06\n",
"10367 1.1e+07 4.7e+06\n",
"10545 1.0e+07 9.9e+06\n",
"10944 3.3e+07 1.9e+07\n",
"11835 1.5e+08 1.1e+08\n",
"12114 3.0e+06 2.1e+06\n",
"12122 4.0e+07 1.4e+07\n",
"12131 2.5e+07 1.8e+07\n",
"12190 5.7e+07 1.3e+07\n",
"12556 1.6e+06 5.5e+05\n",
"12645 1.2e+07 3.8e+06\n",
"13251 4.8e+06 4.0e+06\n",
"... ... ...\n",
"76168051 1.3e+06 1.0e+06\n",
"76222071 1.6e+06 1.6e+06\n",
"76614174 3.8e+06 3.8e+06\n",
"76626270 1.0e+05 1.0e+05\n",
"86594265 3.9e+05 3.9e+05\n",
"86652036 8.5e+06 8.4e+06\n",
"86726889 4.3e+05 4.3e+05\n",
"86770713 1.0e+07 8.7e+06\n",
"86796968 1.2e+06 1.2e+06\n",
"86797042 7.1e+05 7.1e+05\n",
"86870971 8.5e+04 8.5e+04\n",
"86891332 8.5e+04 8.5e+04\n",
"86952480 3.5e+05 3.4e+05\n",
"86956931 4.7e+06 4.7e+06\n",
"87074079 2.8e+06 2.8e+06\n",
"87149257 3.0e+07 3.0e+07\n",
"87157624 1.7e+05 1.6e+05\n",
"87192993 2.1e+05 2.1e+05\n",
"87592827 8.6e+05 6.8e+05\n",
"87847795 2.1e+06 2.1e+06\n",
"87964911 1.9e+06 1.9e+06\n",
"88067807 1.3e+06 1.3e+06\n",
"88068331 5.4e+06 5.4e+06\n",
"88124304 1.3e+06 1.3e+06\n",
"88387402 1.9e+06 1.3e+06\n",
"88701221 8.5e+05 5.1e+05\n",
"88751287 3.3e+06 3.3e+06\n",
"88799387 4.0e+06 3.8e+06\n",
"88845907 6.2e+06 5.7e+06\n",
"88893472 8.3e+04 0.0e+00\n",
"\n",
"[24348 rows x 2 columns]"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot.groupby('ico').sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Agrohrátky\n",
"Ze stránky Agrofertu jsme vzali [výroční zprávu](http://www.agrofert.cz/?288/vyrocni-zpravy) pro rok 2012. Z toho dostanem IČO kódy."
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def getPDFContent(path, pages=[]):\n",
" content = \"\"\n",
" p = file(path, \"rb\")\n",
" pdf = PyPDF2.PdfFileReader(p)\n",
" if pages:\n",
" for i in pages:\n",
" content += pdf.getPage(i).extractText() + \"\\n\"\n",
" else:\n",
" numPages = pdf.getNumPages()\n",
" for i in range(numPages):\n",
" content += pdf.getPage(i).extractText() + \"\\n\"\n",
" content = \" \".join(content.replace(u\"\\xa0\", \" \").strip().split())\n",
" return content"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ag_url = 'http://www.agrofert.cz/f/?4235/vyrocni-zprava-2012'\n",
"\n",
"ag_fn, header = urllib.urlretrieve(ag_url)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"vz = getPDFContent(ag_fn)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#vz = vz.encode('ascii', 'ignore')\n",
"m = re.findall(r\"([0-9]{8})\", vz)\n",
"\n",
"agroico = [int(j) for j in np.unique(m)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ani ne 2 mld pro Andrejka"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1.649"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"round(dot.loc[dot.ico.isin(agroico),'castka_alok'].sum()/10**9, 3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Na 120 projektech"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(120,)"
]
},
"execution_count": 158,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.shape(dot.loc[dot.ico.isin(agroico),'castka_alok'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment