Created
February 27, 2024 06:25
-
-
Save ChocopieKewpie/1afc5c9ac7b1d5b42b1b58b3def6bce4 to your computer and use it in GitHub Desktop.
dggs_join (for benchmarking)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import geopandas as gpd | |
import h3pandas | |
folder_path = 'E:/DGGS_Benchmark/vectors' | |
# Get a list of all GPKG files in the folder | |
v_files = [file for file in os.listdir(folder_path) if file.endswith('.gpkg')] | |
v_files = sorted(v_files) | |
# Specify the number of files to open (e.g., 5) | |
num_files_to_open = 10 | |
h3_res=14 #Setting h3 resolution | |
# Initialize the first DataFrame to store the combined data | |
combined_df = gpd.read_file('E:/DGGS_Benchmark/vectors/vector_001.gpkg').to_crs(4326) | |
combined_df = combined_df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry']) #polyfilling at resolution | |
combined_df.columns = ['001', *combined_df.columns[1:]] | |
combined_df = combined_df.set_index('h3_polyfill') #setting index to h3 cells | |
# Loop through each CSV file and concatenate the data horizontally | |
for file in v_files[1:num_files_to_open]: | |
file_path = os.path.join(folder_path, file) | |
col_name = file.split('_')[1].split('.')[0] # Extracting the column name from the file name | |
df = gpd.read_file(file_path).to_crs(4326) | |
df = df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry']) | |
df.columns = [col_name, *df.columns[1:]] #Setting the column to the file | |
df = df.set_index('h3_polyfill') | |
combined_df = combined_df.join(df) #joining on index (h3 cell_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment