Last active
March 20, 2023 17:26
-
-
Save ttomasz/661d5ec910744cb778c222a949413b6d to your computer and use it in GitHub Desktop.
Convert multiline (regular) GeoJSON to line-delimited
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import datetime | |
import time | |
from pathlib import Path | |
try: | |
import ijson | |
except ModuleNotFoundError: | |
print("Module isjon not found. Run: `pip install ijson`") | |
raise | |
try: | |
import jsonlines | |
except ModuleNotFoundError: | |
print("Module jsonlines not found. Run: `pip install jsonlines`") | |
raise | |
def curent_timestamp() -> str: | |
return datetime.datetime.now().isoformat(sep=" ") | |
if __name__ == '__main__': | |
if len(sys.argv) != 3: | |
print("Number of parameters unexpected. You should run the script like: " | |
"python convert.py input/path.geojson output/path.geonsonl") | |
exit(1) | |
input_path_object = Path(sys.argv[1]).absolute().resolve() | |
input_path = input_path_object.as_posix() | |
if not input_path_object.exists(): | |
raise FileNotFoundError(f"{sys.argv[1]} (resolved to: {input_path}) - not found.") | |
output_path = Path(sys.argv[2]).absolute().as_posix() | |
print(f"{input_path=}") # = suffix shows variable name | |
print(f"{output_path=}") | |
num_rows = 0 | |
num_rows_missing_geometry = 0 | |
print(f"{ijson.backend=}") | |
with open(input_path) as f, jsonlines.open(output_path, mode="w") as writer: | |
print(f"{curent_timestamp()} - starting processing...") | |
time_start = time.perf_counter() | |
for idx, feature in enumerate(ijson.items(f, "features.item", use_float=True)): | |
if feature.get("geometry"): | |
writer.write(feature) | |
else: | |
num_rows_missing_geometry += 1 | |
num_rows = idx + 1 | |
if num_rows % 100000 == 0: | |
print(f"{curent_timestamp()} - rows processed so far: {num_rows:_.0f}") | |
time_end = time.perf_counter() | |
duration = datetime.timedelta(seconds=time_end-time_start) | |
print(f"{curent_timestamp()} - processing finished. Total rows written to new file: {num_rows:_.0f}. " | |
f"Time: {duration}. Number of dropped rows due to lack of geometry: {num_rows_missing_geometry:_.0f}.") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"type": "Feature", "properties": {"TRIP_ID": 1373017604620000351, "CALL_TYPE": "C", "ORIGIN_CALL": "", "ORIGIN_STAND": "", "TAXI_ID": 20000351, "TIMESTAMP": "java.util.GregorianCalendar[time=1373017604,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=9,HOUR_OF_DAY=21,MINUTE=23,SECOND=37,MILLISECOND=604,ZONE_OFFSET=0,DST_OFFSET=0]", "DAY_TYPE": "A", "MISSING_DATA": false}, "geometry": {"type": "MultiPoint", "coordinates": [[-8.669088, 41.235228], [-8.669142, 41.236434], [-8.668863, 41.238369], [-8.667819, 41.236128], [-8.666748, 41.232897], [-8.668368, 41.228892], [-8.671149, 41.226687], [-8.675262, 41.228415], [-8.679879, 41.230557], [-8.685954, 41.232042], [-8.692443, 41.232438], [-8.695629, 41.234238], [-8.695836, 41.238441], [-8.694333, 41.243013], [-8.695107, 41.247549], [-8.69733, 41.250672], [-8.700489, 41.254596], [-8.701713, 41.259483], [-8.701533, 41.264667], [-8.7021, 41.270004], [-8.700444, 41.275503], [-8.698941, 41.280867], [-8.701434, 41.285718], [-8.704773, 41.290461], [-8.706357, 41.295483], [-8.702523, 41.299884], [-8.697591, 41.303547], [-8.697393, 41.308434], [-8.700894, 41.313186], [-8.704386, 41.31792], [-8.7075, 41.322843], [-8.708661, 41.328252], [-8.711145, 41.333598], [-8.712432, 41.339115], [-8.712666, 41.34357], [-8.712027, 41.346648], [-8.712117, 41.350698], [-8.713062, 41.356035], [-8.717904, 41.360796], [-8.722737, 41.365404], [-8.724357, 41.370957], [-8.726283, 41.37552], [-8.728281, 41.379147], [-8.731143, 41.382252], [-8.734932, 41.385114], [-8.738847, 41.388111], [-8.742213, 41.391369], [-8.745471, 41.394717], [-8.748999, 41.398785], [-8.753706, 41.403366], [-8.759556, 41.407353], [-8.76033, 41.412627], [-8.756415, 41.417658], [-8.751537, 41.422401], [-8.746488, 41.427072], [-8.743644, 41.432373], [-8.739972, 41.437773], [-8.736813, 41.443344], [-8.733213, 41.448672], [-8.732241, 41.454324], [-8.735634, 41.459778], [-8.738577, 41.465142], [-8.738874, 41.470263], [-8.740845, 41.475843], [-8.743635, 41.481432], [-8.744616, 41.487075], [-8.746083, 41.492439], [-8.749395, 41.49738], [-8.752446, 41.502537], [-8.753364, 41.508477], [-8.754066, 41.51412], [-8.752401, 41.519637], [-8.750619, 41.525217], [-8.751852, 41.530581], [-8.753373, 41.534919], [-8.754858, 41.539158], [-8.757, 41.543856], [-8.757747, 41.548914], [-8.757459, 41.553432], [-8.757135, 41.557446], [-8.759295, 41.561028], [-8.763336, 41.56452], [-8.76681, 41.568867], [-8.765334, 41.573727], [-8.76321, 41.578416], [-8.764074, 41.583366], [-8.766657, 41.588208], [-8.769645, 41.593113], [-8.772669, 41.597856], [-8.775693, 41.60277], [-8.775288, 41.608269], [-8.772255, 41.612067], [-8.768619, 41.616387], [-8.763831, 41.620086], [-8.761446, 41.624586], [-8.760528, 41.629491], [-8.761491, 41.634126], [-8.763714, 41.63877], [-8.766819, 41.64309], [-8.769609, 41.647545], [-8.771346, 41.652279], [-8.773632, 41.656896], [-8.776854, 41.660928], [-8.777979, 41.665275], [-8.774937, 41.669307], [-8.771211, 41.673006], [-8.771562, 41.677461], [-8.774352, 41.682051], [-8.777448, 41.686227], [-8.781552, 41.689764], [-8.785359, 41.693004], [-8.789166, 41.696244], [-8.792946, 41.699475], [-8.796222, 41.702301], [-8.797311, 41.704848], [-8.798706, 41.706027], [-8.801073, 41.704776], [-8.801379, 41.706711], [-8.802909, 41.706882], [-8.803449, 41.707449]]}} | |
{"type": "Feature", "properties": {"TRIP_ID": 1375450445620000195, "CALL_TYPE": "C", "ORIGIN_CALL": "", "ORIGIN_STAND": "", "TAXI_ID": 20000195, "TIMESTAMP": "java.util.GregorianCalendar[time=1375450445,areFieldsSet=true,areAllFieldsSet=true,lenient=true,zone=java.util.SimpleTimeZone[id=UTC,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=1970,MONTH=0,WEEK_OF_YEAR=3,WEEK_OF_MONTH=3,DAY_OF_MONTH=16,DAY_OF_YEAR=16,DAY_OF_WEEK=6,DAY_OF_WEEK_IN_MONTH=3,AM_PM=1,HOUR=10,HOUR_OF_DAY=22,MINUTE=4,SECOND=10,MILLISECOND=445,ZONE_OFFSET=0,DST_OFFSET=0]", "DAY_TYPE": "A", "MISSING_DATA": false}, "geometry": {"type": "MultiPoint", "coordinates": [[-8.628732, 41.169834], [-8.628732, 41.169843], [-8.628732, 41.169834], [-8.628381, 41.169771], [-8.626923, 41.169924], [-8.62569, 41.169411], [-8.624565, 41.169312], [-8.623782, 41.169213], [-8.623791, 41.169069], [-8.623125, 41.169141], [-8.621946, 41.169663], [-8.619561, 41.171166], [-8.616519, 41.171778], [-8.614818, 41.171823], [-8.613504, 41.17185], [-8.613207, 41.171895], [-8.612856, 41.17158], [-8.612874, 41.171238], [-8.611524, 41.17104], [-8.608725, 41.169492], [-8.607375, 41.169258], [-8.606799, 41.168475], [-8.606628, 41.168025], [-8.606628, 41.166576], [-8.607582, 41.166351], [-8.607573, 41.165712]]}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment