Skip to content

Instantly share code, notes, and snippets.

@answerquest
Created June 9, 2022 05:59
Show Gist options
  • Select an option

  • Save answerquest/90f5841839decec3fd0f6bcba44ef5df to your computer and use it in GitHub Desktop.

Select an option

Save answerquest/90f5841839decec3fd0f6bcba44ef5df to your computer and use it in GitHub Desktop.
convert Village Boundary_split.7z into single .geojsonl, bring district and taluka names into each shape's properties
# mhvillages-make-geojsonl
# by Nikhil VJ, https://nikhilvj.co.in on 2022-06-09
# MIT Open License Boilerplate:
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# of the Software, and to permit persons to whom the Software is furnished to do
# so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import py7zr
import geopandas as gpd
import io
import pandas as pd
zfile = 'Village Boundary_split.7z'
with py7zr.SevenZipFile(zfile, 'r') as archive:
list1 = archive.getnames()
shapefiles = [x for x in list1 if x.endswith('.geojson')]
gCollector = []
for N, file1 in enumerate(shapefiles):
print(file1)
# file1 is like: data/village_bounds/split/Ahmednagar/Jamkhed.geojson
with py7zr.SevenZipFile(zfile, 'r') as archive:
a = archive.read(file1)
# a is like: {'data/village_bounds/split/Ahmednagar/Jamkhed.geojson': <_io.BytesIO at 0x7fe2d4815900>}
gdf1 = gpd.read_file(a[list(a.keys())[0]])
namesHolder = [x.replace('.geojson','') for x in file1.replace('data/village_bounds/split/','').split('/')]
# should become ['Ahmednagar','Jamkhed']
# handle 1 exception: data/village_bounds/split/.geojson -> ['']
if len(namesHolder) < 2:
gdf1['DISTRICT'] = ''
gdf1['TALUKA'] = ''
else:
gdf1['DISTRICT'] = namesHolder[0]
gdf1['TALUKA'] = namesHolder[1]
gCollector.append(gdf1)
gdf2 = gpd.GeoDataFrame( pd.concat( gCollector, ignore_index=True) )
gdf2.to_file('mhvillages.geojsonl', index=False)
# gdf2.to_file('mhvillages.gpkg', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment