Skip to content

Instantly share code, notes, and snippets.

@ChocopieKewpie
Created February 27, 2024 06:25
Show Gist options
  • Save ChocopieKewpie/1afc5c9ac7b1d5b42b1b58b3def6bce4 to your computer and use it in GitHub Desktop.
Save ChocopieKewpie/1afc5c9ac7b1d5b42b1b58b3def6bce4 to your computer and use it in GitHub Desktop.
dggs_join (for benchmarking)
import os
import geopandas as gpd
import h3pandas
folder_path = 'E:/DGGS_Benchmark/vectors'
# Get a list of all GPKG files in the folder
v_files = [file for file in os.listdir(folder_path) if file.endswith('.gpkg')]
v_files = sorted(v_files)
# Specify the number of files to open (e.g., 5)
num_files_to_open = 10
h3_res=14 #Setting h3 resolution
# Initialize the first DataFrame to store the combined data
combined_df = gpd.read_file('E:/DGGS_Benchmark/vectors/vector_001.gpkg').to_crs(4326)
combined_df = combined_df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry']) #polyfilling at resolution
combined_df.columns = ['001', *combined_df.columns[1:]]
combined_df = combined_df.set_index('h3_polyfill') #setting index to h3 cells
# Loop through each CSV file and concatenate the data horizontally
for file in v_files[1:num_files_to_open]:
file_path = os.path.join(folder_path, file)
col_name = file.split('_')[1].split('.')[0] # Extracting the column name from the file name
df = gpd.read_file(file_path).to_crs(4326)
df = df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry'])
df.columns = [col_name, *df.columns[1:]] #Setting the column to the file
df = df.set_index('h3_polyfill')
combined_df = combined_df.join(df) #joining on index (h3 cell_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment