Last active
April 23, 2023 03:24
-
-
Save kauevestena/85f8e22eb5510cc7abaf521163f9661b to your computer and use it in GitHub Desktop.
buildings_import_osm_fortaleza.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/kauevestena/85f8e22eb5510cc7abaf521163f9661b/buildings_import_osm_fortaleza.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "I_wbXKuOExJ-", | |
"outputId": "6c94e200-81b2-412e-f762-aefba9c177ae" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" | |
] | |
} | |
], | |
"source": [ | |
"# connectting to drive first\n", | |
"from google.colab import drive\n", | |
"drive.mount('/content/gdrive')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "__OOizznGZB-" | |
}, | |
"outputs": [], | |
"source": [ | |
"##########\n", | |
"########## SETUP\n", | |
"##########\n", | |
"\n", | |
"buildings_folderpath = '/content/gdrive/MyDrive/OSM_building_imports/building_parts_data'\n", | |
"parcels_folderpath = '/content/gdrive/MyDrive/OSM_building_imports/parcels_data'\n", | |
"\n", | |
"basepath = '/content/gdrive/MyDrive/OSM_building_imports/'\n", | |
"\n", | |
"proj_CRS = \"EPSG:31984\"\n", | |
"\n", | |
"latlon_CRS = 'EPSG:4326'\n", | |
"\n", | |
"cols_to_delete = ['fid','Bairro','']\n", | |
"\n", | |
"TOLERANCE = 0.12 # 12 centimeters" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [], | |
"metadata": { | |
"id": "ESPAxoB-EsMp" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "TaR1A9XWK8my" | |
}, | |
"outputs": [], | |
"source": [ | |
"# renaming rules:\n", | |
"\n", | |
"renamings = {'building':'building:part', 'pmfsefin:i':'pmfsefin:idedif', 'building:l':'building:levels'}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "BUbF2SDjwn75" | |
}, | |
"outputs": [], | |
"source": [ | |
"# package installation\n", | |
"!pip install geopandas > install_log.txt #workaround to avoid all that big verbose output" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "kLRUUzEfTAsP" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install pygeos > install_log2.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "9Mx-dFTBqbJA" | |
}, | |
"outputs": [], | |
"source": [ | |
"# imports\n", | |
"import os, json\n", | |
"import geopandas as gpd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "iqGCxJHgXKPD" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip freeze > installed_packages.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "YvTZJ5tHyEgX" | |
}, | |
"outputs": [], | |
"source": [ | |
"# setting output folder: \n", | |
"\n", | |
"outputfolder_path = os.path.join(basepath,'output')\n", | |
"\n", | |
"if not os.path.exists(outputfolder_path):\n", | |
" os.makedirs(outputfolder_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "m0v8Z7a6JoVd" | |
}, | |
"outputs": [], | |
"source": [ | |
"# get the file list for each:\n", | |
"building_filelist = [filename for filename in os.listdir(buildings_folderpath)]\n", | |
"parcels_filelist = [filename for filename in os.listdir(parcels_folderpath)]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "MtEAWBk_qpIL" | |
}, | |
"outputs": [], | |
"source": [ | |
"if len(building_filelist) != len(parcels_filelist):\n", | |
" raise Exception('They must have the same size!!')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "UkGKF6NdrKmO", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "dad15716-8284-427f-f768-0c5a747ed83f" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"00044 {'buildings_path': '/content/gdrive/MyDrive/OSM_building_imports/building_parts_data/buildings_00044.gpkg', 'parcels_path': '/content/gdrive/MyDrive/OSM_building_imports/parcels_data/parcels_00044.gpkg'}\n" | |
] | |
} | |
], | |
"source": [ | |
"# creating the dictionary wich holds the between parcels and buildings:\n", | |
"file_records = {}\n", | |
"\n", | |
"for entry in building_filelist:\n", | |
" id = entry.split('.')[0].split('_')[-1]\n", | |
" file_records[id] = {}\n", | |
"\n", | |
" file_records[id]['buildings_path'] = os.path.join(buildings_folderpath,entry)\n", | |
"\n", | |
"for i,entry in enumerate(parcels_filelist):\n", | |
" id = entry.split('.')[0].split('_')[-1]\n", | |
" file_records[id]['parcels_path'] = os.path.join(parcels_folderpath,entry)\n", | |
"\n", | |
"\n", | |
"for key in file_records:\n", | |
" print(key,file_records[key])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "oCWjLmWbN_Gm" | |
}, | |
"outputs": [], | |
"source": [ | |
"def single_feature_to_gdf(input_geometry,crs=proj_CRS):\n", | |
" return gpd.GeoDataFrame({'id':[1],'geometry':[input_geometry]},crs=proj_CRS)\n", | |
"\n", | |
"def remove_not_really_inside(input_gdf,inputpolygon):\n", | |
"\n", | |
" single_polygon_gdf = single_feature_to_gdf(inputpolygon)\n", | |
"\n", | |
" in_gdf = input_gdf.copy() # it solves the warning of \"SettingWithCopyWarning\"\n", | |
"\n", | |
" in_gdf.loc[:,'orig_area'] = in_gdf.loc[:,'geometry'].area\n", | |
"\n", | |
"\n", | |
" clipped = in_gdf.overlay(single_polygon_gdf,how='intersection',keep_geom_type=True)\n", | |
"\n", | |
"\n", | |
" clipped.loc[:,'intersection_area'] = clipped.geometry.area\n", | |
"\n", | |
" clipped.loc[:,'areas_ratio'] = clipped['intersection_area'] / clipped['orig_area']\n", | |
"\n", | |
"\n", | |
" # selecting the remaining:\n", | |
"\n", | |
" selected_ids = clipped[clipped['areas_ratio'] > 0.5].set_index('pmfsefin:idedif').index\n", | |
"\n", | |
" result = in_gdf.set_index('pmfsefin:idedif').loc[selected_ids,:].reset_index().drop(columns=['orig_area'])\n", | |
"\n", | |
"\n", | |
" # selected_features = clipped['areas_ratio'] > 0.5\n", | |
"\n", | |
"\n", | |
" # # return clipped[clipped['areas_ratio'] > 0.5].drop(columns=['orig_area','intersection_area','areas_ratio'])\n", | |
" # result = in_gdf.reset_index().drop(columns=['index'])[selected_features]\n", | |
"\n", | |
" \n", | |
" if not result.empty:\n", | |
" return result,max(result['height']),max(result['building:levels'])\n", | |
" else:\n", | |
" return result,None,None\n", | |
"\n", | |
"def dump_json(inputdict,outputpath):\n", | |
" with open(outputpath,'w+') as json_handle:\n", | |
" json.dump(inputdict,json_handle,indent=4,ensure_ascii=False,sort_keys=True)\n", | |
"\n", | |
"def read_json(inputpath):\n", | |
" with open(inputpath) as reader:\n", | |
" data = reader.read()\n", | |
"\n", | |
" return json.loads(data)\n", | |
"\n", | |
"def merge_geojsons(input_pathlist,outputpath,delete_inputs=True):\n", | |
" '''\n", | |
" simple function to merge geojsons without using any library.\n", | |
" Same CRS for all files is assumed.\n", | |
" '''\n", | |
"\n", | |
" ref_dict = None\n", | |
"\n", | |
"\n", | |
" for i,path in enumerate(input_pathlist):\n", | |
"\n", | |
"\n", | |
" if i == 0:\n", | |
" ref_dict = read_json(path)\n", | |
"\n", | |
"\n", | |
" else:\n", | |
" ref_dict['features'] += read_json(path)['features']\n", | |
"\n", | |
"\n", | |
" dump_json(ref_dict,outputpath)\n", | |
"\n", | |
" if delete_inputs:\n", | |
" for inputpath in input_pathlist:\n", | |
" os.remove(inputpath)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "XZj6MWJ2vPYl", | |
"outputId": "aea377aa-ca88-44de-e315-61ff8a05cbf9" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"ID: 00044\n", | |
"buildings: /content/gdrive/MyDrive/OSM_building_imports/building_parts_data/buildings_00044.gpkg\n", | |
"parcels: /content/gdrive/MyDrive/OSM_building_imports/parcels_data/parcels_00044.gpkg\n", | |
"Index(['building:part', 'pmfsefin:idedif', 'building:levels', 'height',\n", | |
" 'Bairro', 'geometry'],\n", | |
" dtype='object')\n", | |
"Bairro\n", | |
"Index(['building:part', 'pmfsefin:idedif', 'building:levels', 'height',\n", | |
" 'geometry'],\n", | |
" dtype='object')\n", | |
" parcel 0 file id: 00044\n", | |
" parcel 10 file id: 00044\n", | |
" parcel 20 file id: 00044\n", | |
" parcel 30 file id: 00044\n", | |
" parcel 40 file id: 00044\n", | |
" parcel 50 file id: 00044\n", | |
" parcel 60 file id: 00044\n", | |
" parcel 70 file id: 00044\n", | |
" parcel 80 file id: 00044\n", | |
" parcel 90 file id: 00044\n", | |
" parcel 100 file id: 00044\n", | |
" parcel 110 file id: 00044\n", | |
" parcel 120 file id: 00044\n", | |
" parcel 130 file id: 00044\n", | |
" parcel 140 file id: 00044\n", | |
" parcel 150 file id: 00044\n", | |
" parcel 160 file id: 00044\n", | |
" parcel 170 file id: 00044\n", | |
" parcel 180 file id: 00044\n", | |
" parcel 190 file id: 00044\n", | |
" parcel 200 file id: 00044\n", | |
" parcel 210 file id: 00044\n", | |
" parcel 220 file id: 00044\n", | |
" parcel 230 file id: 00044\n", | |
" parcel 240 file id: 00044\n", | |
" parcel 250 file id: 00044\n", | |
" parcel 260 file id: 00044\n", | |
" parcel 270 file id: 00044\n", | |
" parcel 280 file id: 00044\n", | |
" parcel 290 file id: 00044\n", | |
" parcel 300 file id: 00044\n", | |
" parcel 310 file id: 00044\n", | |
" parcel 320 file id: 00044\n", | |
" parcel 330 file id: 00044\n", | |
" parcel 340 file id: 00044\n", | |
" parcel 350 file id: 00044\n", | |
" parcel 360 file id: 00044\n", | |
" parcel 370 file id: 00044\n", | |
" parcel 380 file id: 00044\n", | |
" parcel 390 file id: 00044\n", | |
" parcel 400 file id: 00044\n", | |
" parcel 410 file id: 00044\n", | |
" parcel 420 file id: 00044\n", | |
" parcel 430 file id: 00044\n", | |
" parcel 440 file id: 00044\n", | |
" parcel 450 file id: 00044\n", | |
" parcel 460 file id: 00044\n", | |
" parcel 470 file id: 00044\n", | |
" parcel 480 file id: 00044\n", | |
" parcel 490 file id: 00044\n", | |
" parcel 500 file id: 00044\n", | |
" parcel 510 file id: 00044\n", | |
" parcel 520 file id: 00044\n", | |
" parcel 530 file id: 00044\n", | |
" parcel 540 file id: 00044\n", | |
" parcel 550 file id: 00044\n", | |
" parcel 560 file id: 00044\n", | |
" parcel 570 file id: 00044\n", | |
" parcel 580 file id: 00044\n", | |
" parcel 590 file id: 00044\n", | |
" parcel 600 file id: 00044\n", | |
" parcel 610 file id: 00044\n", | |
" parcel 620 file id: 00044\n", | |
" parcel 630 file id: 00044\n", | |
" parcel 640 file id: 00044\n", | |
" parcel 650 file id: 00044\n", | |
" parcel 660 file id: 00044\n", | |
" parcel 670 file id: 00044\n", | |
" parcel 680 file id: 00044\n", | |
" parcel 690 file id: 00044\n", | |
" parcel 700 file id: 00044\n", | |
" parcel 710 file id: 00044\n", | |
" parcel 720 file id: 00044\n", | |
" parcel 730 file id: 00044\n", | |
" parcel 740 file id: 00044\n", | |
" parcel 750 file id: 00044\n", | |
" parcel 760 file id: 00044\n", | |
" parcel 770 file id: 00044\n", | |
" parcel 780 file id: 00044\n", | |
" parcel 790 file id: 00044\n", | |
" parcel 800 file id: 00044\n", | |
" parcel 810 file id: 00044\n", | |
" parcel 820 file id: 00044\n", | |
" parcel 830 file id: 00044\n", | |
" parcel 840 file id: 00044\n", | |
" parcel 850 file id: 00044\n", | |
" parcel 860 file id: 00044\n", | |
" parcel 870 file id: 00044\n", | |
" parcel 880 file id: 00044\n", | |
" parcel 890 file id: 00044\n", | |
" parcel 900 file id: 00044\n", | |
" parcel 910 file id: 00044\n", | |
" parcel 920 file id: 00044\n", | |
" parcel 930 file id: 00044\n", | |
" parcel 940 file id: 00044\n", | |
" parcel 950 file id: 00044\n", | |
" parcel 960 file id: 00044\n", | |
" parcel 970 file id: 00044\n", | |
" parcel 980 file id: 00044\n", | |
" parcel 990 file id: 00044\n", | |
" parcel 1000 file id: 00044\n", | |
" parcel 1010 file id: 00044\n", | |
" parcel 1020 file id: 00044\n", | |
" parcel 1030 file id: 00044\n", | |
" parcel 1040 file id: 00044\n", | |
" parcel 1050 file id: 00044\n", | |
" parcel 1060 file id: 00044\n", | |
" parcel 1070 file id: 00044\n", | |
" parcel 1080 file id: 00044\n", | |
" parcel 1090 file id: 00044\n", | |
" parcel 1100 file id: 00044\n", | |
" parcel 1110 file id: 00044\n", | |
" parcel 1120 file id: 00044\n", | |
" parcel 1130 file id: 00044\n", | |
" parcel 1140 file id: 00044\n", | |
" parcel 1150 file id: 00044\n", | |
" parcel 1160 file id: 00044\n", | |
" parcel 1170 file id: 00044\n", | |
" parcel 1180 file id: 00044\n", | |
" parcel 1190 file id: 00044\n", | |
" parcel 1200 file id: 00044\n", | |
" parcel 1210 file id: 00044\n", | |
" parcel 1220 file id: 00044\n", | |
" parcel 1230 file id: 00044\n", | |
" parcel 1240 file id: 00044\n", | |
" parcel 1250 file id: 00044\n", | |
" parcel 1260 file id: 00044\n", | |
" parcel 1270 file id: 00044\n", | |
" parcel 1280 file id: 00044\n", | |
" parcel 1290 file id: 00044\n", | |
" parcel 1300 file id: 00044\n", | |
" parcel 1310 file id: 00044\n", | |
" parcel 1320 file id: 00044\n", | |
" parcel 1330 file id: 00044\n", | |
" parcel 1340 file id: 00044\n", | |
" parcel 1350 file id: 00044\n", | |
" parcel 1360 file id: 00044\n", | |
" parcel 1370 file id: 00044\n", | |
" parcel 1380 file id: 00044\n", | |
" parcel 1390 file id: 00044\n", | |
" parcel 1400 file id: 00044\n", | |
" parcel 1410 file id: 00044\n", | |
" parcel 1420 file id: 00044\n", | |
" parcel 1430 file id: 00044\n", | |
" parcel 1440 file id: 00044\n", | |
" parcel 1450 file id: 00044\n", | |
" parcel 1460 file id: 00044\n", | |
" parcel 1470 file id: 00044\n", | |
" parcel 1480 file id: 00044\n", | |
" parcel 1490 file id: 00044\n", | |
" parcel 1500 file id: 00044\n", | |
" parcel 1510 file id: 00044\n", | |
" parcel 1520 file id: 00044\n", | |
" parcel 1530 file id: 00044\n", | |
" parcel 1540 file id: 00044\n", | |
" parcel 1550 file id: 00044\n", | |
" parcel 1560 file id: 00044\n", | |
" parcel 1570 file id: 00044\n", | |
" parcel 1580 file id: 00044\n", | |
" parcel 1590 file id: 00044\n", | |
" parcel 1600 file id: 00044\n", | |
" parcel 1610 file id: 00044\n", | |
" parcel 1620 file id: 00044\n", | |
" parcel 1630 file id: 00044\n", | |
" parcel 1640 file id: 00044\n", | |
" parcel 1650 file id: 00044\n", | |
" parcel 1660 file id: 00044\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/local/lib/python3.8/dist-packages/geopandas/geodataframe.py:1443: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
"Try using .loc[row_indexer,col_indexer] = value instead\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
" super().__setitem__(key, value)\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
" parcel 1670 file id: 00044\n", | |
" parcel 1680 file id: 00044\n", | |
" parcel 1690 file id: 00044\n", | |
" parcel 1700 file id: 00044\n", | |
" parcel 1710 file id: 00044\n", | |
" parcel 1720 file id: 00044\n", | |
" parcel 1730 file id: 00044\n", | |
" parcel 1740 file id: 00044\n", | |
" parcel 1750 file id: 00044\n", | |
" parcel 1760 file id: 00044\n", | |
" parcel 1770 file id: 00044\n", | |
" parcel 1780 file id: 00044\n", | |
" parcel 1790 file id: 00044\n", | |
" parcel 1800 file id: 00044\n", | |
" parcel 1810 file id: 00044\n", | |
" parcel 1820 file id: 00044\n", | |
" parcel 1830 file id: 00044\n", | |
" parcel 1840 file id: 00044\n", | |
" parcel 1850 file id: 00044\n" | |
] | |
} | |
], | |
"source": [ | |
"\n", | |
"# doing the actual work:\n", | |
"for id_key in file_records:\n", | |
" # storing already used building ids\n", | |
" inside_list = []\n", | |
" already_used = []\n", | |
"\n", | |
" united_polygons = {'building':[],'geometry':[],'building:levels':[],'height':[],'pmfsefin:idedif':[]}\n", | |
" print('ID: ',id_key)\n", | |
" print('buildings: ',file_records[id_key]['buildings_path'])\n", | |
" print('parcels: ',file_records[id_key]['parcels_path'])\n", | |
"\n", | |
"\n", | |
" # to remove the buildings with a single part afterwards\n", | |
" parts_to_remove = []\n", | |
"\n", | |
" buildings = gpd.read_file(file_records[id_key]['buildings_path'],encoding='latin1').rename(columns=renamings)\n", | |
" print(buildings.columns)\n", | |
"\n", | |
" if cols_to_delete:\n", | |
" for columname in cols_to_delete:\n", | |
" if columname in buildings.columns:\n", | |
" print(columname)\n", | |
" buildings.drop(columns=columname,inplace=True)\n", | |
"\n", | |
" print(buildings.columns)\n", | |
" \n", | |
"\n", | |
" parcels = gpd.read_file(file_records[id_key]['parcels_path'],encoding='latin1')\n", | |
"\n", | |
"\n", | |
"\n", | |
" # picking for each parcel\n", | |
" for i,parcel_geom in enumerate(parcels.geometry):\n", | |
"\n", | |
" try:\n", | |
" if i % 10 == 0:\n", | |
" print(' parcel ',i,' file id: ',id_key)\n", | |
"\n", | |
"\n", | |
" inside_df = buildings.geometry.intersects(parcel_geom.buffer(0.00001))\n", | |
"\n", | |
" if inside_df.empty:\n", | |
" print('empty')\n", | |
"\n", | |
" # buildings that are inside the parcel:\n", | |
" # selected_buildings = buildings[inside_df]\n", | |
"\n", | |
" selected_buildings,max_height,max_b_levels = remove_not_really_inside(buildings[inside_df],parcel_geom)\n", | |
" # print(max_height,max_b_levels)\n", | |
"\n", | |
" # unary union if is a polygon then only one building is on the parcel, if it multipolygon, there's more\n", | |
" unary = selected_buildings.geometry.unary_union\n", | |
"\n", | |
" if unary:\n", | |
"\n", | |
" inside_list = list(selected_buildings['pmfsefin:idedif'])\n", | |
"\n", | |
" repeated = [id for id in inside_list if id in already_used]\n", | |
"\n", | |
" if repeated:\n", | |
" # print(repeated,inside_list)\n", | |
"\n", | |
" inside_list = [id for id in inside_list if id not in repeated]\n", | |
"\n", | |
" if not inside_list:\n", | |
" continue\n", | |
" else:\n", | |
" # removing the repeated ones:\n", | |
" selected_buildings = selected_buildings.set_index('pmfsefin:idedif').drop(repeated).reset_index()\n", | |
"\n", | |
" if selected_buildings.empty:\n", | |
" continue\n", | |
" else:\n", | |
" unary = selected_buildings.geometry.unary_union\n", | |
"\n", | |
" if not unary:\n", | |
" continue\n", | |
"\n", | |
" \n", | |
" # print(inside_list)\n", | |
"\n", | |
"\n", | |
" g_type = unary.geom_type\n", | |
"\n", | |
" if g_type == 'Polygon':\n", | |
" united_polygons['building'].append('yes')\n", | |
" united_polygons['geometry'].append(unary)\n", | |
"\n", | |
" united_polygons['building:levels'].append(max_b_levels)\n", | |
" united_polygons['height'].append(max_height)\n", | |
"\n", | |
"\n", | |
"\n", | |
" if selected_buildings.shape[0] == 1:\n", | |
" curr_id = list(selected_buildings['pmfsefin:idedif'])[0]\n", | |
"\n", | |
" parts_to_remove.append(curr_id)\n", | |
" united_polygons['pmfsefin:idedif'].append(curr_id)\n", | |
"\n", | |
"\n", | |
" else:\n", | |
" united_polygons['pmfsefin:idedif'].append('')\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
" if g_type == \"MultiPolygon\":\n", | |
" united_as_gdf = single_feature_to_gdf(unary).explode(index_parts=False)\n", | |
"\n", | |
" for polygon in united_as_gdf.geometry:\n", | |
" united_polygons['building'].append('yes')\n", | |
" united_polygons['geometry'].append(polygon)\n", | |
"\n", | |
" # search for the parts inside each baseline\n", | |
" remaining_buildings = selected_buildings[selected_buildings.intersects(polygon)]\n", | |
"\n", | |
" if remaining_buildings.shape[0] == 1:\n", | |
" curr_id = list(remaining_buildings['pmfsefin:idedif'])[0]\n", | |
"\n", | |
" united_polygons['pmfsefin:idedif'].append(curr_id)\n", | |
" parts_to_remove.append(curr_id)\n", | |
"\n", | |
" else:\n", | |
" united_polygons['pmfsefin:idedif'].append('')\n", | |
"\n", | |
"\n", | |
" # m1 = max(remaining_buildings['height'])\n", | |
" # m2 = max(remaining_buildings['building:levels'])\n", | |
" # print(m1,m2,max(selected_buildings['height']),max(selected_buildings['building:levels']))\n", | |
"\n", | |
" united_polygons['building:levels'].append(max(remaining_buildings['building:levels']))\n", | |
" united_polygons['height'].append(max(remaining_buildings['height']))\n", | |
"\n", | |
" already_used += inside_list\n", | |
" # removing repeated entries:\n", | |
" already_used = list(set(already_used))\n", | |
" inside_list = []\n", | |
" \n", | |
"\n", | |
" except:\n", | |
" with open(os.path.join(basepath,'skipped_parcels.txt'),'a+') as report_writer:\n", | |
" centroid_text = parcel_geom.representative_point().wkt\n", | |
" report_writer.write(centroid_text+'\\n')\n", | |
" print('skipped parcel ',i,' centroid: ',centroid_text)\n", | |
"\n", | |
"\n", | |
" # removing the ones that got only one part:\n", | |
" # print(buildings.shape)\n", | |
" # print(len(parts_to_remove))\n", | |
"\n", | |
" # removing buildings that have a single part\n", | |
" buildings = buildings.set_index('pmfsefin:idedif').drop(parts_to_remove).reset_index()\n", | |
"\n", | |
"\n", | |
"\n", | |
" # taking care of the buildings that are oustide the parcels:\n", | |
" not_inside_parcels = buildings.to_crs(proj_CRS).disjoint(parcels.geometry.unary_union)\n", | |
" buildings_outside_parcels = buildings[not_inside_parcels].rename(columns={'building:part':'building'}).to_crs(latlon_CRS)\n", | |
" outpath_outside_parcels = os.path.join(outputfolder_path,f'{id_key}_outside_parcels.geojson')\n", | |
"\n", | |
" if not buildings_outside_parcels.empty:\n", | |
" buildings_outside_parcels.to_file(outpath_outside_parcels)\n", | |
"\n", | |
"\n", | |
" # checking if there's overlapping parts, throwing an error \n", | |
" for outside_building in buildings_outside_parcels.geometry:\n", | |
" overlappers = buildings_outside_parcels.overlaps(outside_building)\n", | |
"\n", | |
" if not buildings_outside_parcels[overlappers].empty:\n", | |
" if not overlappers.empty: \n", | |
" if True in overlappers:\n", | |
" raise Exception('There are overlapping building parts!!')\n", | |
"\n", | |
" # exporting the parts of the buildings:\n", | |
" buildings = buildings[~not_inside_parcels]\n", | |
" buildings = buildings.to_crs(latlon_CRS)\n", | |
" buildings['pmfsefin:idedif'] = buildings['pmfsefin:idedif'].astype(str)\n", | |
" outpath_parts_geojson = os.path.join(outputfolder_path,f'{id_key}_building_parts.geojson')\n", | |
"\n", | |
" if not buildings.empty:\n", | |
" buildings.to_file(outpath_parts_geojson)\n", | |
"\n", | |
"\n", | |
" # exporting the baselines of the buildings:\n", | |
" united_as_gdf = gpd.GeoDataFrame(united_polygons,crs=proj_CRS).to_crs(latlon_CRS)\n", | |
"\n", | |
" outpath_united_geojson = os.path.join(outputfolder_path,f'{id_key}_baselines.geojson')\n", | |
"\n", | |
" if not united_as_gdf.empty:\n", | |
" united_as_gdf.to_file(outpath_united_geojson)\n", | |
"\n", | |
" # now exporting all to a single file\n", | |
" paths_to_merge = []\n", | |
"\n", | |
" for part_path in [outpath_united_geojson,outpath_parts_geojson,outpath_outside_parcels]:\n", | |
" if os.path.exists(part_path):\n", | |
" paths_to_merge.append(part_path)\n", | |
"\n", | |
"\n", | |
" merged_outpath = os.path.join(outputfolder_path,f'{id_key}_exported_buildings.geojson')\n", | |
"\n", | |
" merge_geojsons(paths_to_merge,merged_outpath)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "GQL03ZR35Rj7", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "bd4f4e57-f593-45d1-aa0d-30639ddaccf8" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"00044\n", | |
"5393 6142 0\n" | |
] | |
} | |
], | |
"source": [ | |
"# first iterating through outtut folder:\n", | |
"for filename in os.listdir(outputfolder_path):\n", | |
"\n", | |
" id_key = filename.split('_')[0]\n", | |
" print(id_key)\n", | |
"\n", | |
" # as was originally:\n", | |
" b_before = gpd.read_file(file_records[id_key]['buildings_path'],encoding='latin1').rename(columns=renamings)\n", | |
" b_before['pmfsefin:idedif'] = b_before['pmfsefin:idedif'].astype(str)\n", | |
"\n", | |
"\n", | |
"\n", | |
" # as it has been written:\n", | |
" filepath = os.path.join(outputfolder_path,filename)\n", | |
" b_after = gpd.read_file(filepath)\n", | |
"\n", | |
" # checking if there are missing 'pmfsefin:idedif'\n", | |
" l_before = list(b_before['pmfsefin:idedif'])\n", | |
"\n", | |
" l_after = list(b_after['pmfsefin:idedif'])\n", | |
"\n", | |
" l_missing = []\n", | |
"\n", | |
" for id in l_before:\n", | |
" if id != '':\n", | |
" if not id in l_after:\n", | |
" l_missing.append(id)\n", | |
"\n", | |
"\n", | |
" print(len(l_before),len(l_after),len(l_missing))\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "YFACQHGpAQKo" | |
}, | |
"source": [ | |
"<b style=\"color:blue;\">PART2: <br>\n", | |
"-------------- SPLITTING UP: <br>\n", | |
"\n", | |
" -ALREADY EXISTENT (will need a careful looking for each) <br>\n", | |
"\n", | |
" -NON-EXISTING (can be straightforwardly imported)</b>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "2cxiXkJbAOP3" | |
}, | |
"outputs": [], | |
"source": [ | |
"# PART 2, starting setting up the second part output folder:\n", | |
"outputsplitted_folderpath = os.path.join(basepath,'output_splitted')\n", | |
"\n", | |
"if not os.path.exists(outputsplitted_folderpath):\n", | |
" os.makedirs(outputsplitted_folderpath)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "xISg9U7uNSae" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install osmnx > install_log3.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "sgK5Lki6QNTi" | |
}, | |
"outputs": [], | |
"source": [ | |
"import osmnx as ox\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"## SET TOLERANCE FOR SIMPLIFYING " | |
], | |
"metadata": { | |
"id": "WN6FRXv_EFTP" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "LF_6_VLWNpe_" | |
}, | |
"outputs": [], | |
"source": [ | |
"# first iterating through outtut folder:\n", | |
"for filename in os.listdir(outputfolder_path):\n", | |
" id = filename.split('_')[0]\n", | |
"\n", | |
" filepath = os.path.join(outputfolder_path,filename)\n", | |
"\n", | |
" # reading as gdf to get its bounding box: \n", | |
" allbuildings = gpd.read_file(filepath)\n", | |
"\n", | |
" # doing the simplification:\n", | |
" allbuildings = allbuildings.to_crs(proj_CRS) #projecting\n", | |
"\n", | |
" allbuildings[\"geometry\"] = allbuildings.geometry.simplify(TOLERANCE)\n", | |
"\n", | |
" allbuildings = allbuildings.to_crs(latlon_CRS) #unprojecting\n", | |
"\n", | |
" # end of simplification part\n", | |
"\n", | |
" allbuildings['pmfsefin:idedif'] = allbuildings['pmfsefin:idedif'].replace('',np.nan)\n", | |
"\n", | |
" w,s,e,n = allbuildings.unary_union.bounds\n", | |
"\n", | |
" # THE HEAVY LIFT, THE DOWNLOAD OF OSM DATA:\n", | |
" already_existent = ox.geometries.geometries_from_bbox(n, s, e, w, {'building':True}).reset_index()\n", | |
" # creating as a single multipolygon to speed up things:\n", | |
" already_existent_unary_pol = already_existent.geometry.unary_union\n", | |
"\n", | |
"\n", | |
" # splitting buildings (baselines) from parts\n", | |
" building_baselines = allbuildings[allbuildings['building'] == 'yes'].drop(columns=['building:part'])\n", | |
"\n", | |
" building_parts = allbuildings[allbuildings['building:part'] == 'yes'].drop(columns=['building'])\n", | |
" building_parts['pmfsefin:idedif'] = building_parts['pmfsefin:idedif'].astype(int) # to avoind messing with wrong types at serialization\n", | |
"\n", | |
" baselines_with_correspondences_indexes = building_baselines.intersects(already_existent_unary_pol)\n", | |
"\n", | |
"\n", | |
" # splitting up to for analysis ease\n", | |
" baselines_with_correspondences = building_baselines[baselines_with_correspondences_indexes]\n", | |
" baselines_without_correspondences = building_baselines[~baselines_with_correspondences_indexes]\n", | |
"\n", | |
"\n", | |
" # doing an analogous thing to split parts among the ones with correspondences and without\n", | |
" baselines_with_correspondences_unary = baselines_with_correspondences.geometry.unary_union\n", | |
" parts_inside_with_correspondence_indexes = building_parts.intersects(baselines_with_correspondences_unary)\n", | |
"\n", | |
" # splitting up to for analysis ease, again\n", | |
" parts_with_correspondences = building_parts[parts_inside_with_correspondence_indexes]\n", | |
" parts_without_correspondences = building_parts[~parts_inside_with_correspondence_indexes]\n", | |
"\n", | |
" # outputting:\n", | |
" baselines_with_correspondences_path = os.path.join(outputsplitted_folderpath,'b_wc.geojson')\n", | |
" baselines_without_correspondences_path = os.path.join(outputsplitted_folderpath,'b_wo_c.geojson') \n", | |
" parts_with_correspondences_path = os.path.join(outputsplitted_folderpath,'p_w_c.geojson')\n", | |
" parts_without_correspondences_path = os.path.join(outputsplitted_folderpath,'p_wo_c.geojson')\n", | |
"\n", | |
" #outputting then merging:\n", | |
"\n", | |
" #### WITH CORRESPONDENCES\n", | |
" baselines_with_correspondences.to_file(baselines_with_correspondences_path)\n", | |
" parts_with_correspondences.to_file(parts_with_correspondences_path)\n", | |
"\n", | |
" merged_with_correspondences_path = os.path.join(outputsplitted_folderpath,f'b_{id}_with_correspondence.geojson')\n", | |
" merge_geojsons([baselines_with_correspondences_path,parts_with_correspondences_path],merged_with_correspondences_path)\n", | |
"\n", | |
" #### WITHOUT CORRESPONDENCES\n", | |
" baselines_without_correspondences.to_file(baselines_without_correspondences_path)\n", | |
" parts_without_correspondences.to_file(parts_without_correspondences_path)\n", | |
"\n", | |
" merged_without_correspondences_path = os.path.join(outputsplitted_folderpath,f'b_{id}_WITHOUT_correspondence.geojson')\n", | |
" merge_geojsons([baselines_without_correspondences_path,parts_without_correspondences_path],merged_without_correspondences_path)\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"metadata": { | |
"accelerator": "TPU", | |
"colab": { | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"name": "python3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment