Skip to content

Instantly share code, notes, and snippets.

@kauevestena
Last active April 23, 2023 03:24
Show Gist options
  • Save kauevestena/85f8e22eb5510cc7abaf521163f9661b to your computer and use it in GitHub Desktop.
Save kauevestena/85f8e22eb5510cc7abaf521163f9661b to your computer and use it in GitHub Desktop.
buildings_import_osm_fortaleza.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kauevestena/85f8e22eb5510cc7abaf521163f9661b/buildings_import_osm_fortaleza.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "I_wbXKuOExJ-",
"outputId": "6c94e200-81b2-412e-f762-aefba9c177ae"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
]
}
],
"source": [
"# connectting to drive first\n",
"from google.colab import drive\n",
"drive.mount('/content/gdrive')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "__OOizznGZB-"
},
"outputs": [],
"source": [
"##########\n",
"########## SETUP\n",
"##########\n",
"\n",
"buildings_folderpath = '/content/gdrive/MyDrive/OSM_building_imports/building_parts_data'\n",
"parcels_folderpath = '/content/gdrive/MyDrive/OSM_building_imports/parcels_data'\n",
"\n",
"basepath = '/content/gdrive/MyDrive/OSM_building_imports/'\n",
"\n",
"proj_CRS = \"EPSG:31984\"\n",
"\n",
"latlon_CRS = 'EPSG:4326'\n",
"\n",
"cols_to_delete = ['fid','Bairro','']\n",
"\n",
"TOLERANCE = 0.12 # 12 centimeters"
]
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "ESPAxoB-EsMp"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TaR1A9XWK8my"
},
"outputs": [],
"source": [
"# renaming rules:\n",
"\n",
"renamings = {'building':'building:part', 'pmfsefin:i':'pmfsefin:idedif', 'building:l':'building:levels'}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BUbF2SDjwn75"
},
"outputs": [],
"source": [
"# package installation\n",
"!pip install geopandas > install_log.txt #workaround to avoid all that big verbose output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kLRUUzEfTAsP"
},
"outputs": [],
"source": [
"!pip install pygeos > install_log2.txt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "9Mx-dFTBqbJA"
},
"outputs": [],
"source": [
"# imports\n",
"import os, json\n",
"import geopandas as gpd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iqGCxJHgXKPD"
},
"outputs": [],
"source": [
"!pip freeze > installed_packages.txt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YvTZJ5tHyEgX"
},
"outputs": [],
"source": [
"# setting output folder: \n",
"\n",
"outputfolder_path = os.path.join(basepath,'output')\n",
"\n",
"if not os.path.exists(outputfolder_path):\n",
" os.makedirs(outputfolder_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "m0v8Z7a6JoVd"
},
"outputs": [],
"source": [
"# get the file list for each:\n",
"building_filelist = [filename for filename in os.listdir(buildings_folderpath)]\n",
"parcels_filelist = [filename for filename in os.listdir(parcels_folderpath)]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "MtEAWBk_qpIL"
},
"outputs": [],
"source": [
"if len(building_filelist) != len(parcels_filelist):\n",
" raise Exception('They must have the same size!!')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "UkGKF6NdrKmO",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "dad15716-8284-427f-f768-0c5a747ed83f"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"00044 {'buildings_path': '/content/gdrive/MyDrive/OSM_building_imports/building_parts_data/buildings_00044.gpkg', 'parcels_path': '/content/gdrive/MyDrive/OSM_building_imports/parcels_data/parcels_00044.gpkg'}\n"
]
}
],
"source": [
"# creating the dictionary wich holds the between parcels and buildings:\n",
"file_records = {}\n",
"\n",
"for entry in building_filelist:\n",
" id = entry.split('.')[0].split('_')[-1]\n",
" file_records[id] = {}\n",
"\n",
" file_records[id]['buildings_path'] = os.path.join(buildings_folderpath,entry)\n",
"\n",
"for i,entry in enumerate(parcels_filelist):\n",
" id = entry.split('.')[0].split('_')[-1]\n",
" file_records[id]['parcels_path'] = os.path.join(parcels_folderpath,entry)\n",
"\n",
"\n",
"for key in file_records:\n",
" print(key,file_records[key])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oCWjLmWbN_Gm"
},
"outputs": [],
"source": [
"def single_feature_to_gdf(input_geometry,crs=proj_CRS):\n",
" return gpd.GeoDataFrame({'id':[1],'geometry':[input_geometry]},crs=proj_CRS)\n",
"\n",
"def remove_not_really_inside(input_gdf,inputpolygon):\n",
"\n",
" single_polygon_gdf = single_feature_to_gdf(inputpolygon)\n",
"\n",
" in_gdf = input_gdf.copy() # it solves the warning of \"SettingWithCopyWarning\"\n",
"\n",
" in_gdf.loc[:,'orig_area'] = in_gdf.loc[:,'geometry'].area\n",
"\n",
"\n",
" clipped = in_gdf.overlay(single_polygon_gdf,how='intersection',keep_geom_type=True)\n",
"\n",
"\n",
" clipped.loc[:,'intersection_area'] = clipped.geometry.area\n",
"\n",
" clipped.loc[:,'areas_ratio'] = clipped['intersection_area'] / clipped['orig_area']\n",
"\n",
"\n",
" # selecting the remaining:\n",
"\n",
" selected_ids = clipped[clipped['areas_ratio'] > 0.5].set_index('pmfsefin:idedif').index\n",
"\n",
" result = in_gdf.set_index('pmfsefin:idedif').loc[selected_ids,:].reset_index().drop(columns=['orig_area'])\n",
"\n",
"\n",
" # selected_features = clipped['areas_ratio'] > 0.5\n",
"\n",
"\n",
" # # return clipped[clipped['areas_ratio'] > 0.5].drop(columns=['orig_area','intersection_area','areas_ratio'])\n",
" # result = in_gdf.reset_index().drop(columns=['index'])[selected_features]\n",
"\n",
" \n",
" if not result.empty:\n",
" return result,max(result['height']),max(result['building:levels'])\n",
" else:\n",
" return result,None,None\n",
"\n",
"def dump_json(inputdict,outputpath):\n",
" with open(outputpath,'w+') as json_handle:\n",
" json.dump(inputdict,json_handle,indent=4,ensure_ascii=False,sort_keys=True)\n",
"\n",
"def read_json(inputpath):\n",
" with open(inputpath) as reader:\n",
" data = reader.read()\n",
"\n",
" return json.loads(data)\n",
"\n",
"def merge_geojsons(input_pathlist,outputpath,delete_inputs=True):\n",
" '''\n",
" simple function to merge geojsons without using any library.\n",
" Same CRS for all files is assumed.\n",
" '''\n",
"\n",
" ref_dict = None\n",
"\n",
"\n",
" for i,path in enumerate(input_pathlist):\n",
"\n",
"\n",
" if i == 0:\n",
" ref_dict = read_json(path)\n",
"\n",
"\n",
" else:\n",
" ref_dict['features'] += read_json(path)['features']\n",
"\n",
"\n",
" dump_json(ref_dict,outputpath)\n",
"\n",
" if delete_inputs:\n",
" for inputpath in input_pathlist:\n",
" os.remove(inputpath)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XZj6MWJ2vPYl",
"outputId": "aea377aa-ca88-44de-e315-61ff8a05cbf9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"ID: 00044\n",
"buildings: /content/gdrive/MyDrive/OSM_building_imports/building_parts_data/buildings_00044.gpkg\n",
"parcels: /content/gdrive/MyDrive/OSM_building_imports/parcels_data/parcels_00044.gpkg\n",
"Index(['building:part', 'pmfsefin:idedif', 'building:levels', 'height',\n",
" 'Bairro', 'geometry'],\n",
" dtype='object')\n",
"Bairro\n",
"Index(['building:part', 'pmfsefin:idedif', 'building:levels', 'height',\n",
" 'geometry'],\n",
" dtype='object')\n",
" parcel 0 file id: 00044\n",
" parcel 10 file id: 00044\n",
" parcel 20 file id: 00044\n",
" parcel 30 file id: 00044\n",
" parcel 40 file id: 00044\n",
" parcel 50 file id: 00044\n",
" parcel 60 file id: 00044\n",
" parcel 70 file id: 00044\n",
" parcel 80 file id: 00044\n",
" parcel 90 file id: 00044\n",
" parcel 100 file id: 00044\n",
" parcel 110 file id: 00044\n",
" parcel 120 file id: 00044\n",
" parcel 130 file id: 00044\n",
" parcel 140 file id: 00044\n",
" parcel 150 file id: 00044\n",
" parcel 160 file id: 00044\n",
" parcel 170 file id: 00044\n",
" parcel 180 file id: 00044\n",
" parcel 190 file id: 00044\n",
" parcel 200 file id: 00044\n",
" parcel 210 file id: 00044\n",
" parcel 220 file id: 00044\n",
" parcel 230 file id: 00044\n",
" parcel 240 file id: 00044\n",
" parcel 250 file id: 00044\n",
" parcel 260 file id: 00044\n",
" parcel 270 file id: 00044\n",
" parcel 280 file id: 00044\n",
" parcel 290 file id: 00044\n",
" parcel 300 file id: 00044\n",
" parcel 310 file id: 00044\n",
" parcel 320 file id: 00044\n",
" parcel 330 file id: 00044\n",
" parcel 340 file id: 00044\n",
" parcel 350 file id: 00044\n",
" parcel 360 file id: 00044\n",
" parcel 370 file id: 00044\n",
" parcel 380 file id: 00044\n",
" parcel 390 file id: 00044\n",
" parcel 400 file id: 00044\n",
" parcel 410 file id: 00044\n",
" parcel 420 file id: 00044\n",
" parcel 430 file id: 00044\n",
" parcel 440 file id: 00044\n",
" parcel 450 file id: 00044\n",
" parcel 460 file id: 00044\n",
" parcel 470 file id: 00044\n",
" parcel 480 file id: 00044\n",
" parcel 490 file id: 00044\n",
" parcel 500 file id: 00044\n",
" parcel 510 file id: 00044\n",
" parcel 520 file id: 00044\n",
" parcel 530 file id: 00044\n",
" parcel 540 file id: 00044\n",
" parcel 550 file id: 00044\n",
" parcel 560 file id: 00044\n",
" parcel 570 file id: 00044\n",
" parcel 580 file id: 00044\n",
" parcel 590 file id: 00044\n",
" parcel 600 file id: 00044\n",
" parcel 610 file id: 00044\n",
" parcel 620 file id: 00044\n",
" parcel 630 file id: 00044\n",
" parcel 640 file id: 00044\n",
" parcel 650 file id: 00044\n",
" parcel 660 file id: 00044\n",
" parcel 670 file id: 00044\n",
" parcel 680 file id: 00044\n",
" parcel 690 file id: 00044\n",
" parcel 700 file id: 00044\n",
" parcel 710 file id: 00044\n",
" parcel 720 file id: 00044\n",
" parcel 730 file id: 00044\n",
" parcel 740 file id: 00044\n",
" parcel 750 file id: 00044\n",
" parcel 760 file id: 00044\n",
" parcel 770 file id: 00044\n",
" parcel 780 file id: 00044\n",
" parcel 790 file id: 00044\n",
" parcel 800 file id: 00044\n",
" parcel 810 file id: 00044\n",
" parcel 820 file id: 00044\n",
" parcel 830 file id: 00044\n",
" parcel 840 file id: 00044\n",
" parcel 850 file id: 00044\n",
" parcel 860 file id: 00044\n",
" parcel 870 file id: 00044\n",
" parcel 880 file id: 00044\n",
" parcel 890 file id: 00044\n",
" parcel 900 file id: 00044\n",
" parcel 910 file id: 00044\n",
" parcel 920 file id: 00044\n",
" parcel 930 file id: 00044\n",
" parcel 940 file id: 00044\n",
" parcel 950 file id: 00044\n",
" parcel 960 file id: 00044\n",
" parcel 970 file id: 00044\n",
" parcel 980 file id: 00044\n",
" parcel 990 file id: 00044\n",
" parcel 1000 file id: 00044\n",
" parcel 1010 file id: 00044\n",
" parcel 1020 file id: 00044\n",
" parcel 1030 file id: 00044\n",
" parcel 1040 file id: 00044\n",
" parcel 1050 file id: 00044\n",
" parcel 1060 file id: 00044\n",
" parcel 1070 file id: 00044\n",
" parcel 1080 file id: 00044\n",
" parcel 1090 file id: 00044\n",
" parcel 1100 file id: 00044\n",
" parcel 1110 file id: 00044\n",
" parcel 1120 file id: 00044\n",
" parcel 1130 file id: 00044\n",
" parcel 1140 file id: 00044\n",
" parcel 1150 file id: 00044\n",
" parcel 1160 file id: 00044\n",
" parcel 1170 file id: 00044\n",
" parcel 1180 file id: 00044\n",
" parcel 1190 file id: 00044\n",
" parcel 1200 file id: 00044\n",
" parcel 1210 file id: 00044\n",
" parcel 1220 file id: 00044\n",
" parcel 1230 file id: 00044\n",
" parcel 1240 file id: 00044\n",
" parcel 1250 file id: 00044\n",
" parcel 1260 file id: 00044\n",
" parcel 1270 file id: 00044\n",
" parcel 1280 file id: 00044\n",
" parcel 1290 file id: 00044\n",
" parcel 1300 file id: 00044\n",
" parcel 1310 file id: 00044\n",
" parcel 1320 file id: 00044\n",
" parcel 1330 file id: 00044\n",
" parcel 1340 file id: 00044\n",
" parcel 1350 file id: 00044\n",
" parcel 1360 file id: 00044\n",
" parcel 1370 file id: 00044\n",
" parcel 1380 file id: 00044\n",
" parcel 1390 file id: 00044\n",
" parcel 1400 file id: 00044\n",
" parcel 1410 file id: 00044\n",
" parcel 1420 file id: 00044\n",
" parcel 1430 file id: 00044\n",
" parcel 1440 file id: 00044\n",
" parcel 1450 file id: 00044\n",
" parcel 1460 file id: 00044\n",
" parcel 1470 file id: 00044\n",
" parcel 1480 file id: 00044\n",
" parcel 1490 file id: 00044\n",
" parcel 1500 file id: 00044\n",
" parcel 1510 file id: 00044\n",
" parcel 1520 file id: 00044\n",
" parcel 1530 file id: 00044\n",
" parcel 1540 file id: 00044\n",
" parcel 1550 file id: 00044\n",
" parcel 1560 file id: 00044\n",
" parcel 1570 file id: 00044\n",
" parcel 1580 file id: 00044\n",
" parcel 1590 file id: 00044\n",
" parcel 1600 file id: 00044\n",
" parcel 1610 file id: 00044\n",
" parcel 1620 file id: 00044\n",
" parcel 1630 file id: 00044\n",
" parcel 1640 file id: 00044\n",
" parcel 1650 file id: 00044\n",
" parcel 1660 file id: 00044\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.8/dist-packages/geopandas/geodataframe.py:1443: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" super().__setitem__(key, value)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
" parcel 1670 file id: 00044\n",
" parcel 1680 file id: 00044\n",
" parcel 1690 file id: 00044\n",
" parcel 1700 file id: 00044\n",
" parcel 1710 file id: 00044\n",
" parcel 1720 file id: 00044\n",
" parcel 1730 file id: 00044\n",
" parcel 1740 file id: 00044\n",
" parcel 1750 file id: 00044\n",
" parcel 1760 file id: 00044\n",
" parcel 1770 file id: 00044\n",
" parcel 1780 file id: 00044\n",
" parcel 1790 file id: 00044\n",
" parcel 1800 file id: 00044\n",
" parcel 1810 file id: 00044\n",
" parcel 1820 file id: 00044\n",
" parcel 1830 file id: 00044\n",
" parcel 1840 file id: 00044\n",
" parcel 1850 file id: 00044\n"
]
}
],
"source": [
"\n",
"# doing the actual work:\n",
"for id_key in file_records:\n",
" # storing already used building ids\n",
" inside_list = []\n",
" already_used = []\n",
"\n",
" united_polygons = {'building':[],'geometry':[],'building:levels':[],'height':[],'pmfsefin:idedif':[]}\n",
" print('ID: ',id_key)\n",
" print('buildings: ',file_records[id_key]['buildings_path'])\n",
" print('parcels: ',file_records[id_key]['parcels_path'])\n",
"\n",
"\n",
" # to remove the buildings with a single part afterwards\n",
" parts_to_remove = []\n",
"\n",
" buildings = gpd.read_file(file_records[id_key]['buildings_path'],encoding='latin1').rename(columns=renamings)\n",
" print(buildings.columns)\n",
"\n",
" if cols_to_delete:\n",
" for columname in cols_to_delete:\n",
" if columname in buildings.columns:\n",
" print(columname)\n",
" buildings.drop(columns=columname,inplace=True)\n",
"\n",
" print(buildings.columns)\n",
" \n",
"\n",
" parcels = gpd.read_file(file_records[id_key]['parcels_path'],encoding='latin1')\n",
"\n",
"\n",
"\n",
" # picking for each parcel\n",
" for i,parcel_geom in enumerate(parcels.geometry):\n",
"\n",
" try:\n",
" if i % 10 == 0:\n",
" print(' parcel ',i,' file id: ',id_key)\n",
"\n",
"\n",
" inside_df = buildings.geometry.intersects(parcel_geom.buffer(0.00001))\n",
"\n",
" if inside_df.empty:\n",
" print('empty')\n",
"\n",
" # buildings that are inside the parcel:\n",
" # selected_buildings = buildings[inside_df]\n",
"\n",
" selected_buildings,max_height,max_b_levels = remove_not_really_inside(buildings[inside_df],parcel_geom)\n",
" # print(max_height,max_b_levels)\n",
"\n",
" # unary union if is a polygon then only one building is on the parcel, if it multipolygon, there's more\n",
" unary = selected_buildings.geometry.unary_union\n",
"\n",
" if unary:\n",
"\n",
" inside_list = list(selected_buildings['pmfsefin:idedif'])\n",
"\n",
" repeated = [id for id in inside_list if id in already_used]\n",
"\n",
" if repeated:\n",
" # print(repeated,inside_list)\n",
"\n",
" inside_list = [id for id in inside_list if id not in repeated]\n",
"\n",
" if not inside_list:\n",
" continue\n",
" else:\n",
" # removing the repeated ones:\n",
" selected_buildings = selected_buildings.set_index('pmfsefin:idedif').drop(repeated).reset_index()\n",
"\n",
" if selected_buildings.empty:\n",
" continue\n",
" else:\n",
" unary = selected_buildings.geometry.unary_union\n",
"\n",
" if not unary:\n",
" continue\n",
"\n",
" \n",
" # print(inside_list)\n",
"\n",
"\n",
" g_type = unary.geom_type\n",
"\n",
" if g_type == 'Polygon':\n",
" united_polygons['building'].append('yes')\n",
" united_polygons['geometry'].append(unary)\n",
"\n",
" united_polygons['building:levels'].append(max_b_levels)\n",
" united_polygons['height'].append(max_height)\n",
"\n",
"\n",
"\n",
" if selected_buildings.shape[0] == 1:\n",
" curr_id = list(selected_buildings['pmfsefin:idedif'])[0]\n",
"\n",
" parts_to_remove.append(curr_id)\n",
" united_polygons['pmfsefin:idedif'].append(curr_id)\n",
"\n",
"\n",
" else:\n",
" united_polygons['pmfsefin:idedif'].append('')\n",
"\n",
"\n",
"\n",
"\n",
"\n",
" if g_type == \"MultiPolygon\":\n",
" united_as_gdf = single_feature_to_gdf(unary).explode(index_parts=False)\n",
"\n",
" for polygon in united_as_gdf.geometry:\n",
" united_polygons['building'].append('yes')\n",
" united_polygons['geometry'].append(polygon)\n",
"\n",
" # search for the parts inside each baseline\n",
" remaining_buildings = selected_buildings[selected_buildings.intersects(polygon)]\n",
"\n",
" if remaining_buildings.shape[0] == 1:\n",
" curr_id = list(remaining_buildings['pmfsefin:idedif'])[0]\n",
"\n",
" united_polygons['pmfsefin:idedif'].append(curr_id)\n",
" parts_to_remove.append(curr_id)\n",
"\n",
" else:\n",
" united_polygons['pmfsefin:idedif'].append('')\n",
"\n",
"\n",
" # m1 = max(remaining_buildings['height'])\n",
" # m2 = max(remaining_buildings['building:levels'])\n",
" # print(m1,m2,max(selected_buildings['height']),max(selected_buildings['building:levels']))\n",
"\n",
" united_polygons['building:levels'].append(max(remaining_buildings['building:levels']))\n",
" united_polygons['height'].append(max(remaining_buildings['height']))\n",
"\n",
" already_used += inside_list\n",
" # removing repeated entries:\n",
" already_used = list(set(already_used))\n",
" inside_list = []\n",
" \n",
"\n",
" except:\n",
" with open(os.path.join(basepath,'skipped_parcels.txt'),'a+') as report_writer:\n",
" centroid_text = parcel_geom.representative_point().wkt\n",
" report_writer.write(centroid_text+'\\n')\n",
" print('skipped parcel ',i,' centroid: ',centroid_text)\n",
"\n",
"\n",
" # removing the ones that got only one part:\n",
" # print(buildings.shape)\n",
" # print(len(parts_to_remove))\n",
"\n",
" # removing buildings that have a single part\n",
" buildings = buildings.set_index('pmfsefin:idedif').drop(parts_to_remove).reset_index()\n",
"\n",
"\n",
"\n",
" # taking care of the buildings that are oustide the parcels:\n",
" not_inside_parcels = buildings.to_crs(proj_CRS).disjoint(parcels.geometry.unary_union)\n",
" buildings_outside_parcels = buildings[not_inside_parcels].rename(columns={'building:part':'building'}).to_crs(latlon_CRS)\n",
" outpath_outside_parcels = os.path.join(outputfolder_path,f'{id_key}_outside_parcels.geojson')\n",
"\n",
" if not buildings_outside_parcels.empty:\n",
" buildings_outside_parcels.to_file(outpath_outside_parcels)\n",
"\n",
"\n",
" # checking if there's overlapping parts, throwing an error \n",
" for outside_building in buildings_outside_parcels.geometry:\n",
" overlappers = buildings_outside_parcels.overlaps(outside_building)\n",
"\n",
" if not buildings_outside_parcels[overlappers].empty:\n",
" if not overlappers.empty: \n",
" if True in overlappers:\n",
" raise Exception('There are overlapping building parts!!')\n",
"\n",
" # exporting the parts of the buildings:\n",
" buildings = buildings[~not_inside_parcels]\n",
" buildings = buildings.to_crs(latlon_CRS)\n",
" buildings['pmfsefin:idedif'] = buildings['pmfsefin:idedif'].astype(str)\n",
" outpath_parts_geojson = os.path.join(outputfolder_path,f'{id_key}_building_parts.geojson')\n",
"\n",
" if not buildings.empty:\n",
" buildings.to_file(outpath_parts_geojson)\n",
"\n",
"\n",
" # exporting the baselines of the buildings:\n",
" united_as_gdf = gpd.GeoDataFrame(united_polygons,crs=proj_CRS).to_crs(latlon_CRS)\n",
"\n",
" outpath_united_geojson = os.path.join(outputfolder_path,f'{id_key}_baselines.geojson')\n",
"\n",
" if not united_as_gdf.empty:\n",
" united_as_gdf.to_file(outpath_united_geojson)\n",
"\n",
" # now exporting all to a single file\n",
" paths_to_merge = []\n",
"\n",
" for part_path in [outpath_united_geojson,outpath_parts_geojson,outpath_outside_parcels]:\n",
" if os.path.exists(part_path):\n",
" paths_to_merge.append(part_path)\n",
"\n",
"\n",
" merged_outpath = os.path.join(outputfolder_path,f'{id_key}_exported_buildings.geojson')\n",
"\n",
" merge_geojsons(paths_to_merge,merged_outpath)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GQL03ZR35Rj7",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "bd4f4e57-f593-45d1-aa0d-30639ddaccf8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"00044\n",
"5393 6142 0\n"
]
}
],
"source": [
"# first iterating through outtut folder:\n",
"for filename in os.listdir(outputfolder_path):\n",
"\n",
" id_key = filename.split('_')[0]\n",
" print(id_key)\n",
"\n",
" # as was originally:\n",
" b_before = gpd.read_file(file_records[id_key]['buildings_path'],encoding='latin1').rename(columns=renamings)\n",
" b_before['pmfsefin:idedif'] = b_before['pmfsefin:idedif'].astype(str)\n",
"\n",
"\n",
"\n",
" # as it has been written:\n",
" filepath = os.path.join(outputfolder_path,filename)\n",
" b_after = gpd.read_file(filepath)\n",
"\n",
" # checking if there are missing 'pmfsefin:idedif'\n",
" l_before = list(b_before['pmfsefin:idedif'])\n",
"\n",
" l_after = list(b_after['pmfsefin:idedif'])\n",
"\n",
" l_missing = []\n",
"\n",
" for id in l_before:\n",
" if id != '':\n",
" if not id in l_after:\n",
" l_missing.append(id)\n",
"\n",
"\n",
" print(len(l_before),len(l_after),len(l_missing))\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YFACQHGpAQKo"
},
"source": [
"<b style=\"color:blue;\">PART2: <br>\n",
"-------------- SPLITTING UP: <br>\n",
"\n",
" -ALREADY EXISTENT (will need a careful looking for each) <br>\n",
"\n",
" -NON-EXISTING (can be straightforwardly imported)</b>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2cxiXkJbAOP3"
},
"outputs": [],
"source": [
"# PART 2, starting setting up the second part output folder:\n",
"outputsplitted_folderpath = os.path.join(basepath,'output_splitted')\n",
"\n",
"if not os.path.exists(outputsplitted_folderpath):\n",
" os.makedirs(outputsplitted_folderpath)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xISg9U7uNSae"
},
"outputs": [],
"source": [
"!pip install osmnx > install_log3.txt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sgK5Lki6QNTi"
},
"outputs": [],
"source": [
"import osmnx as ox\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"source": [
"## SET TOLERANCE FOR SIMPLIFYING "
],
"metadata": {
"id": "WN6FRXv_EFTP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LF_6_VLWNpe_"
},
"outputs": [],
"source": [
"# first iterating through outtut folder:\n",
"for filename in os.listdir(outputfolder_path):\n",
" id = filename.split('_')[0]\n",
"\n",
" filepath = os.path.join(outputfolder_path,filename)\n",
"\n",
" # reading as gdf to get its bounding box: \n",
" allbuildings = gpd.read_file(filepath)\n",
"\n",
" # doing the simplification:\n",
" allbuildings = allbuildings.to_crs(proj_CRS) #projecting\n",
"\n",
" allbuildings[\"geometry\"] = allbuildings.geometry.simplify(TOLERANCE)\n",
"\n",
" allbuildings = allbuildings.to_crs(latlon_CRS) #unprojecting\n",
"\n",
" # end of simplification part\n",
"\n",
" allbuildings['pmfsefin:idedif'] = allbuildings['pmfsefin:idedif'].replace('',np.nan)\n",
"\n",
" w,s,e,n = allbuildings.unary_union.bounds\n",
"\n",
" # THE HEAVY LIFT, THE DOWNLOAD OF OSM DATA:\n",
" already_existent = ox.geometries.geometries_from_bbox(n, s, e, w, {'building':True}).reset_index()\n",
" # creating as a single multipolygon to speed up things:\n",
" already_existent_unary_pol = already_existent.geometry.unary_union\n",
"\n",
"\n",
" # splitting buildings (baselines) from parts\n",
" building_baselines = allbuildings[allbuildings['building'] == 'yes'].drop(columns=['building:part'])\n",
"\n",
" building_parts = allbuildings[allbuildings['building:part'] == 'yes'].drop(columns=['building'])\n",
" building_parts['pmfsefin:idedif'] = building_parts['pmfsefin:idedif'].astype(int) # to avoind messing with wrong types at serialization\n",
"\n",
" baselines_with_correspondences_indexes = building_baselines.intersects(already_existent_unary_pol)\n",
"\n",
"\n",
" # splitting up to for analysis ease\n",
" baselines_with_correspondences = building_baselines[baselines_with_correspondences_indexes]\n",
" baselines_without_correspondences = building_baselines[~baselines_with_correspondences_indexes]\n",
"\n",
"\n",
" # doing an analogous thing to split parts among the ones with correspondences and without\n",
" baselines_with_correspondences_unary = baselines_with_correspondences.geometry.unary_union\n",
" parts_inside_with_correspondence_indexes = building_parts.intersects(baselines_with_correspondences_unary)\n",
"\n",
" # splitting up to for analysis ease, again\n",
" parts_with_correspondences = building_parts[parts_inside_with_correspondence_indexes]\n",
" parts_without_correspondences = building_parts[~parts_inside_with_correspondence_indexes]\n",
"\n",
" # outputting:\n",
" baselines_with_correspondences_path = os.path.join(outputsplitted_folderpath,'b_wc.geojson')\n",
" baselines_without_correspondences_path = os.path.join(outputsplitted_folderpath,'b_wo_c.geojson') \n",
" parts_with_correspondences_path = os.path.join(outputsplitted_folderpath,'p_w_c.geojson')\n",
" parts_without_correspondences_path = os.path.join(outputsplitted_folderpath,'p_wo_c.geojson')\n",
"\n",
" #outputting then merging:\n",
"\n",
" #### WITH CORRESPONDENCES\n",
" baselines_with_correspondences.to_file(baselines_with_correspondences_path)\n",
" parts_with_correspondences.to_file(parts_with_correspondences_path)\n",
"\n",
" merged_with_correspondences_path = os.path.join(outputsplitted_folderpath,f'b_{id}_with_correspondence.geojson')\n",
" merge_geojsons([baselines_with_correspondences_path,parts_with_correspondences_path],merged_with_correspondences_path)\n",
"\n",
" #### WITHOUT CORRESPONDENCES\n",
" baselines_without_correspondences.to_file(baselines_without_correspondences_path)\n",
" parts_without_correspondences.to_file(parts_without_correspondences_path)\n",
"\n",
" merged_without_correspondences_path = os.path.join(outputsplitted_folderpath,f'b_{id}_WITHOUT_correspondence.geojson')\n",
" merge_geojsons([baselines_without_correspondences_path,parts_without_correspondences_path],merged_without_correspondences_path)\n",
"\n",
"\n"
]
}
],
"metadata": {
"accelerator": "TPU",
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment