Skip to content

Instantly share code, notes, and snippets.

@valgur
Last active November 18, 2017 13:57
Show Gist options
  • Save valgur/d9d0d822f7e262cf53919ffc0f70ec31 to your computer and use it in GitHub Desktop.
Save valgur/d9d0d822f7e262cf53919ffc0f70ec31 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import geopandas as gpd\n",
"from shapely.geometry import box, Polygon\n",
"import numpy as np\n",
"\n",
"# read the CSV and strip whitespace\n",
"df = pd.read_table(\"avalik_1.csv\").apply(lambda col: col.str.strip())\n",
"df[df == \"\"] = np.nan\n",
"\n",
"# convert the provided L-EST coordinate ranges to box-shaped Shapely polygons\n",
"lest_x = df.Lest_X.str.split('-', expand=True).apply(pd.to_numeric)\n",
"lest_y = df.Lest_Y.str.split('-', expand=True).apply(pd.to_numeric)\n",
"geoms = [box(*bounds) if np.isfinite(sum(bounds)) else Polygon() \n",
" for bounds in zip(lest_y[0], lest_x[0], lest_y[1] + 1, lest_x[1] + 1)]\n",
"\n",
"# create a GeoDataFrame\n",
"gdf = gpd.GeoDataFrame(df.copy(), geometry=geoms, crs={'init': 'epsg:3301'})\n",
"\n",
"# convert L-EST to WGS84 (lon-lat)\n",
"gdf_wgs84 = gdf.to_crs({'init': 'epsg:4326'})"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(24.69091202365922, 59.39886684603926),\n",
" (28.1677274320991, 59.37952049461464),\n",
" (24.77102873925808, 59.438817647171305),\n",
" (24.744302358568977, 59.42550620159956),\n",
" (24.82443562319098, 59.46093630388677),\n",
" (24.682200718054673, 59.40340073053982),\n",
" (24.85925003676478, 59.442760037279065),\n",
" (24.753307768701816, 59.4344318064427),\n",
" (24.72668712977187, 59.425604623551266),\n",
" (24.79704276042106, 59.42070896500355)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coords = [(p.x, p.y) if not p.is_empty else (None, None) for p in gdf_wgs84.centroid]\n",
"coords[:10]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>JuhtumId</th>\n",
" <th>ToimKpv</th>\n",
" <th>ToimKell</th>\n",
" <th>ToimNadalapaev</th>\n",
" <th>SyndmusLiik</th>\n",
" <th>SyndmusTaiendavStatLiik</th>\n",
" <th>Seadus</th>\n",
" <th>Paragrahv</th>\n",
" <th>ParagrahvTais</th>\n",
" <th>Loige</th>\n",
" <th>Kahjusumma</th>\n",
" <th>KohtLiik</th>\n",
" <th>MaakondNimetus</th>\n",
" <th>ValdLinnNimetus</th>\n",
" <th>KohtNimetus</th>\n",
" <th>SyyteoLiik</th>\n",
" <th>lon</th>\n",
" <th>lat</th>\n",
" <th>KoordTapsus</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>59203c96-2c27-18d5-8fb4-854b247a4c84</td>\n",
" <td>2017-11-14</td>\n",
" <td>14:15</td>\n",
" <td>Teisipäev</td>\n",
" <td>MUU</td>\n",
" <td>NaN</td>\n",
" <td>Karistusseadustik</td>\n",
" <td>§ 218.</td>\n",
" <td>§ 218. Varavastane süütegu väheväärtusliku asj...</td>\n",
" <td>lg. 1.</td>\n",
" <td>NaN</td>\n",
" <td>AVALIK_KOHT,KAUPLUS</td>\n",
" <td>Harju maakond</td>\n",
" <td>Tallinn</td>\n",
" <td>Mustamäe linnaosa</td>\n",
" <td>VT</td>\n",
" <td>24.690912</td>\n",
" <td>59.398867</td>\n",
" <td>500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>59203c64-2c27-18d5-8fb4-854b247a4c84</td>\n",
" <td>2017-11-14</td>\n",
" <td>14:00</td>\n",
" <td>Teisipäev</td>\n",
" <td>AVALIKU_KORRA_RIKKUMINE,KEHALINE_VAARKOHTLEMINE</td>\n",
" <td>NaN</td>\n",
" <td>Karistusseadustik</td>\n",
" <td>§ 263.</td>\n",
" <td>§ 263. Avaliku korra raske rikkumine</td>\n",
" <td>lg. 1.</td>\n",
" <td>NaN</td>\n",
" <td>TANAV_VALJAK</td>\n",
" <td>Ida-Viru maakond</td>\n",
" <td>Narva linn</td>\n",
" <td>Narva linn</td>\n",
" <td>KT</td>\n",
" <td>28.167727</td>\n",
" <td>59.379520</td>\n",
" <td>500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>59203c3c-2c27-18d5-8fb4-854b247a4c84</td>\n",
" <td>2017-11-14</td>\n",
" <td>12:35</td>\n",
" <td>Teisipäev</td>\n",
" <td>VARGUS</td>\n",
" <td>MUU_VARGUS</td>\n",
" <td>Karistusseadustik</td>\n",
" <td>§ 199.</td>\n",
" <td>§ 199. Vargus</td>\n",
" <td>lg. 2.</td>\n",
" <td>NaN</td>\n",
" <td>AVALIK_KOHT,KAUPLUS</td>\n",
" <td>Harju maakond</td>\n",
" <td>Tallinn</td>\n",
" <td>Kesklinna linnaosa</td>\n",
" <td>KT</td>\n",
" <td>24.771029</td>\n",
" <td>59.438818</td>\n",
" <td>500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>59203c00-2c27-18d5-8fb4-854b247a4c84</td>\n",
" <td>2017-11-13</td>\n",
" <td>20:22</td>\n",
" <td>Esmaspäev</td>\n",
" <td>VANDALISM</td>\n",
" <td>NaN</td>\n",
" <td>Karistusseadustik</td>\n",
" <td>§ 218.</td>\n",
" <td>§ 218. Varavastane süütegu väheväärtusliku asj...</td>\n",
" <td>lg. 1.</td>\n",
" <td>0-499</td>\n",
" <td>AVALIK_KOHT,TEENINDUSETTEVOTE</td>\n",
" <td>Harju maakond</td>\n",
" <td>Tallinn</td>\n",
" <td>Kesklinna linnaosa</td>\n",
" <td>VT</td>\n",
" <td>24.744302</td>\n",
" <td>59.425506</td>\n",
" <td>500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>59203be2-2c27-18d5-8fb4-854b247a4c84</td>\n",
" <td>2017-11-13</td>\n",
" <td>18:40</td>\n",
" <td>Esmaspäev</td>\n",
" <td>PISIVARGUS</td>\n",
" <td>NaN</td>\n",
" <td>Karistusseadustik</td>\n",
" <td>§ 218.</td>\n",
" <td>§ 218. Varavastane süütegu väheväärtusliku asj...</td>\n",
" <td>lg. 1.</td>\n",
" <td>0-499</td>\n",
" <td>AVALIK_KOHT,KAUPLUS</td>\n",
" <td>Harju maakond</td>\n",
" <td>Tallinn</td>\n",
" <td>Pirita linnaosa</td>\n",
" <td>VT</td>\n",
" <td>24.824436</td>\n",
" <td>59.460936</td>\n",
" <td>500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" JuhtumId ToimKpv ToimKell ToimNadalapaev \\\n",
"0 59203c96-2c27-18d5-8fb4-854b247a4c84 2017-11-14 14:15 Teisipäev \n",
"1 59203c64-2c27-18d5-8fb4-854b247a4c84 2017-11-14 14:00 Teisipäev \n",
"2 59203c3c-2c27-18d5-8fb4-854b247a4c84 2017-11-14 12:35 Teisipäev \n",
"3 59203c00-2c27-18d5-8fb4-854b247a4c84 2017-11-13 20:22 Esmaspäev \n",
"4 59203be2-2c27-18d5-8fb4-854b247a4c84 2017-11-13 18:40 Esmaspäev \n",
"\n",
" SyndmusLiik SyndmusTaiendavStatLiik \\\n",
"0 MUU NaN \n",
"1 AVALIKU_KORRA_RIKKUMINE,KEHALINE_VAARKOHTLEMINE NaN \n",
"2 VARGUS MUU_VARGUS \n",
"3 VANDALISM NaN \n",
"4 PISIVARGUS NaN \n",
"\n",
" Seadus Paragrahv \\\n",
"0 Karistusseadustik § 218. \n",
"1 Karistusseadustik § 263. \n",
"2 Karistusseadustik § 199. \n",
"3 Karistusseadustik § 218. \n",
"4 Karistusseadustik § 218. \n",
"\n",
" ParagrahvTais Loige Kahjusumma \\\n",
"0 § 218. Varavastane süütegu väheväärtusliku asj... lg. 1. NaN \n",
"1 § 263. Avaliku korra raske rikkumine lg. 1. NaN \n",
"2 § 199. Vargus lg. 2. NaN \n",
"3 § 218. Varavastane süütegu väheväärtusliku asj... lg. 1. 0-499 \n",
"4 § 218. Varavastane süütegu väheväärtusliku asj... lg. 1. 0-499 \n",
"\n",
" KohtLiik MaakondNimetus ValdLinnNimetus \\\n",
"0 AVALIK_KOHT,KAUPLUS Harju maakond Tallinn \n",
"1 TANAV_VALJAK Ida-Viru maakond Narva linn \n",
"2 AVALIK_KOHT,KAUPLUS Harju maakond Tallinn \n",
"3 AVALIK_KOHT,TEENINDUSETTEVOTE Harju maakond Tallinn \n",
"4 AVALIK_KOHT,KAUPLUS Harju maakond Tallinn \n",
"\n",
" KohtNimetus SyyteoLiik lon lat KoordTapsus \n",
"0 Mustamäe linnaosa VT 24.690912 59.398867 500 \n",
"1 Narva linn KT 28.167727 59.379520 500 \n",
"2 Kesklinna linnaosa KT 24.771029 59.438818 500 \n",
"3 Kesklinna linnaosa VT 24.744302 59.425506 500 \n",
"4 Pirita linnaosa VT 24.824436 59.460936 500 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"modified_df = pd.concat([\n",
" df.drop(['Lest_X', 'Lest_Y'], axis=1), \n",
" pd.DataFrame(coords, columns=['lon', 'lat']),\n",
" pd.Series(np.sqrt(gdf.area), name='KoordTapsus').astype(int)\n",
"], axis=1)\n",
"modified_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"modified_df.to_csv('avalik_1_lonlat.tsv', index=False, sep='\\t', float_format='%.5f')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also save it in GeoJSON or any other vector data format you prefer so it can be easily viewed in some GIS software, such as QGIS."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"gdf_wgs84[~gdf_wgs84.is_empty].to_file('avalik_1.geojson', driver='GeoJSON')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment