Last active
March 27, 2024 14:15
-
-
Save aialenti/d7215cc3de25af8b8dfec4fa3a181637 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import ast | |
from datetime import datetime | |
import numpy as np | |
def read_dataframes(): | |
""" | |
Reads the activity weights and details CSV files into Pandas dataframes. | |
Returns: | |
tuple: A tuple containing two pandas DataFrames, the first for | |
activity weights and the second for activity details. | |
""" | |
activity_weights_df = pd.read_csv('activity_weights.csv') | |
activity_details_df = pd.read_csv('activity_details.csv') | |
daily_summaries_filtered_df = pd.read_csv('daily_summaries_filtered.csv') | |
return activity_weights_df, activity_details_df, daily_summaries_filtered_df | |
def find_max_weights(activity_weights_df): | |
""" | |
Finds the maximum weight for each exercise across all activities. | |
Args: | |
activity_weights_df (pd.DataFrame): Dataframe containing activity weights. | |
Returns: | |
dict: A dictionary with exercises as keys and their maximum weights as values. | |
""" | |
max_weights = {} | |
for index, row in activity_weights_df.iterrows(): | |
weights_dict = ast.literal_eval(row['wkt_step_weights']) | |
for exercise, weights in weights_dict.items(): | |
max_weight = max(weights) | |
max_weights[exercise] = max(max_weights.get(exercise, 0), max_weight) | |
return max_weights | |
def aggregate_activity_data(activity_details_df, activity_weights_df): | |
""" | |
Merges activity details and weights dataframes on 'activityId' and | |
calculates aggregates for each activity. | |
Args: | |
activity_details_df (pd.DataFrame): Dataframe containing activity details. | |
activity_weights_df (pd.DataFrame): Dataframe containing activity weights. | |
Returns: | |
dict: A dictionary with activity names as keys and dictionaries of aggregates as values. | |
""" | |
# Ensure 'Start Date' is in datetime format | |
activity_details_df['Start Date'] = pd.to_datetime(activity_details_df['Start Date']) | |
# Merge dataframes on 'activityId' and calculate aggregates | |
merged_df = pd.merge(activity_details_df, activity_weights_df, on="activityId", how="inner") | |
activity_aggregates_with_frequency = {} | |
for name, group in merged_df.groupby('Activity Name'): | |
total_sessions = len(group) | |
date_range = (group['Start Date'].max() - group['Start Date'].min()).days / 7 | |
frequency_per_week = total_sessions / max(date_range, 1) # Avoid division by zero | |
# Existing aggregates | |
aggregates = { | |
'Average Moving Duration': group['Moving Duration'].mean(), | |
'Moving Duration/Duration Ratio': (group['Moving Duration'] / group['Duration']).mean(), | |
'Average HR': group['Average HR'].mean(), | |
'Average Calories': group['Calories'].mean(), | |
'Average Total Sets': group['Total Sets'].mean(), | |
'Average Total Reps': group['Total Reps'].mean(), | |
'Total Sessions': total_sessions, | |
'Frequency per Week': frequency_per_week | |
} | |
activity_aggregates_with_frequency[name] = aggregates | |
return activity_aggregates_with_frequency | |
def calculate_filtered_medians(df): | |
""" | |
Calculate medians for specified quantities in a DataFrame, | |
excluding rows where sleepingSeconds is less than 5000. | |
Args: | |
df (pd.DataFrame): The input DataFrame with columns 'totalKilocalories', | |
'activeKilocalories', 'restingHeartRate', and 'sleepingSeconds'. | |
Returns: | |
pd.Series: Median values for 'totalKilocalories', 'activeKilocalories', | |
'restingHeartRate', and 'sleepingSeconds', excluding | |
rows with sleepingSeconds < 5000. | |
""" | |
# Filter the DataFrame to exclude rows with sleepingSeconds < 5000 | |
filtered_df = df[df['sleepingSeconds'] >= 5000] | |
# Calculate the median for the specified columns | |
median_values = filtered_df.median() | |
return median_values | |
activity_weights_df, activity_details_df, daily_summaries_filtered_df = read_dataframes() | |
max_weights = find_max_weights(activity_weights_df) | |
print(max_weights) | |
activity_aggregates = aggregate_activity_data(activity_details_df, activity_weights_df) | |
print(activity_aggregates) | |
medians = calculate_filtered_medians(daily_summaries_filtered_df) | |
print(medians) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment