Created
February 10, 2025 22:33
-
-
Save grujdin/2292b0f3187a71e74ab08c7c0f3d1601 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary libraries | |
import pandas as pd | |
from calendar import monthrange | |
# Load the Excel file containing disaster data into a pandas DataFrame | |
df = pd.read_excel('D:/ProjDB/GAUL/public_emdat_GDIS_GAUL_FIDs.xlsx') | |
# Define a function to format the start and end dates into ISO 8601 format | |
def format_dates(row): | |
""" | |
Extracts Start Date and End Date as separate string columns in ISO 8601 format. | |
""" | |
# If 'Start Day' is missing, default to the 1st day of the month | |
start_day = int(row['Start Day']) if not pd.isna(row['Start Day']) else 1 | |
start_date = f"{row['Start Year']}-{row['Start Month']:02d}-{start_day:02d}T00:00:00Z" | |
# If 'End Day' is missing, determine the last day of the 'End Month' | |
if pd.isna(row['End Day']): | |
last_day = monthrange(row['End Year'], row['End Month'])[1] | |
end_day = last_day | |
else: | |
end_day = int(row['End Day']) | |
end_date = f"{row['End Year']}-{row['End Month']:02d}-{end_day:02d}T00:00:00Z" | |
# Return start_date and end_date as a tuple | |
return pd.Series([start_date, end_date]) | |
# Apply the function to each row and create two new columns | |
df[['Start Date', 'End Date']] = df.apply(format_dates, axis=1) | |
# Select specific columns for the output DataFrame, including the new 'Start Date' and 'End Date' columns | |
output_df = df[['Unique Code', 'DisNo.', 'Classification Key', 'Start Date', 'End Date', | |
'External IDs', 'FID_1', 'adm1_code', 'adm1_name', 'FID_2', 'adm2_code', 'adm2_name']] | |
# Rename columns to match required names if needed | |
output_df.columns = ['Unique Code', 'DisNo.', 'Classification Key', 'Start Date', 'End Date', | |
'External IDs', 'FID_1', 'adm1_code', 'adm1_name', 'FID_2', 'adm2_code', 'adm2_name'] | |
# Write the processed data to a new Excel file without including row indices | |
output_df.to_excel('D:/ProjDB/API/API_Request_Data.xlsx', index=False) | |
print("Data successfully saved to 'API_Request_Data.xlsx'.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment