ChocopieKewpie · February 27, 2024 06:25
diff --git a/dggs_join.py b/dggs_join.py

 import os
 import geopandas as gpd
 import h3pandas


 folder_path = 'E:/DGGS_Benchmark/vectors'

 # Get a list of all GPKG files in the folder
 v_files = [file for file in os.listdir(folder_path) if file.endswith('.gpkg')]
 v_files = sorted(v_files)

 # Specify the number of files to open (e.g., 5)
 num_files_to_open = 10
 h3_res=14   #Setting h3 resolution

 # Initialize the first DataFrame to store the combined data
 combined_df = gpd.read_file('E:/DGGS_Benchmark/vectors/vector_001.gpkg').to_crs(4326)
 combined_df = combined_df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry']) #polyfilling at resolution
 combined_df.columns = ['001', *combined_df.columns[1:]]
 combined_df = combined_df.set_index('h3_polyfill')  #setting index to h3 cells

 # Loop through each CSV file and concatenate the data horizontally
 for file in v_files[1:num_files_to_open]:
    file_path = os.path.join(folder_path, file)
    col_name = file.split('_')[1].split('.')[0]  # Extracting the column name from the file name
    df = gpd.read_file(file_path).to_crs(4326)
    df = df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry'])
    df.columns = [col_name, *df.columns[1:]] #Setting the column to the file
    df = df.set_index('h3_polyfill')
    combined_df = combined_df.join(df)  #joining on index (h3 cell_id)

	import os
	import geopandas as gpd
	import h3pandas


	folder_path = 'E:/DGGS_Benchmark/vectors'

	# Get a list of all GPKG files in the folder
	v_files = [file for file in os.listdir(folder_path) if file.endswith('.gpkg')]
	v_files = sorted(v_files)

	# Specify the number of files to open (e.g., 5)
	num_files_to_open = 10
	h3_res=14 #Setting h3 resolution

	# Initialize the first DataFrame to store the combined data
	combined_df = gpd.read_file('E:/DGGS_Benchmark/vectors/vector_001.gpkg').to_crs(4326)
	combined_df = combined_df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry']) #polyfilling at resolution
	combined_df.columns = ['001', *combined_df.columns[1:]]
	combined_df = combined_df.set_index('h3_polyfill') #setting index to h3 cells

	# Loop through each CSV file and concatenate the data horizontally
	for file in v_files[1:num_files_to_open]:
	file_path = os.path.join(folder_path, file)
	col_name = file.split('_')[1].split('.')[0] # Extracting the column name from the file name
	df = gpd.read_file(file_path).to_crs(4326)
	df = df.h3.polyfill(h3_res,explode=True).drop(columns=['geometry'])
	df.columns = [col_name, *df.columns[1:]] #Setting the column to the file
	df = df.set_index('h3_polyfill')
	combined_df = combined_df.join(df) #joining on index (h3 cell_id)