Created
June 5, 2021 11:23
-
-
Save davidbradway/61154ea21354b5935a0429a39fdf40c7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# conda create -n pd python | |
# (source) conda activate pd | |
# conda install pandas | |
# conda install matplotlib | |
# remove pd environment | |
import json | |
import os | |
import datetime as dt | |
import pandas as pd | |
from matplotlib import pyplot as plt | |
car_trips = [] | |
when = [] | |
bike_trips = [] | |
when_bike = [] | |
dirs = os.listdir(os.path.join('Location History', 'Semantic Location History')) | |
for directory in dirs: | |
files = os.listdir(os.path.join('Location History', 'Semantic Location History', directory)) | |
for filename in files: | |
try: | |
if filename.endswith('.json'): | |
full_filename = os.path.join('Location History', 'Semantic Location History', directory, filename) | |
with open(full_filename) as f: | |
data = json.load(f) | |
for i in range(len(data['timelineObjects'])): | |
temp = data['timelineObjects'][i].get('activitySegment') | |
if temp is not None and temp.get('activityType') is not None and temp.get('activityType') == 'IN_PASSENGER_VEHICLE': | |
car_trips.append(temp.get('distance')/1.6/1000.) | |
when.append(temp.get('duration')['startTimestampMs']) | |
if temp is not None and temp.get('activityType') is not None and temp.get('activityType') == 'CYCLING': | |
bike_trips.append(temp.get('distance')/1.6/1000.) | |
when_bike.append(temp.get('duration')['startTimestampMs']) | |
except Exception as e: # work on python 3.x | |
print(str(e)) | |
df = pd.DataFrame() | |
df['Miles'] = car_trips | |
df['timestampMs'] = pd.DataFrame(when).astype(float)/1000. | |
df['datetime'] = df['timestampMs'].apply(lambda t: dt.datetime.fromtimestamp(t)) | |
df1 = pd.DataFrame() | |
df1['Daily Miles'] = df['Miles'].groupby(df['datetime'].dt.to_period('D')).sum() | |
df1.hist(bins=50) | |
plt.xlabel('Miles Driven per Day') | |
plt.ylabel('Number of Days') | |
plt.title('Histogram of Miles Driven per Day') | |
plt.axvline(df1['Daily Miles'].median(), color='y', linestyle='dashed', linewidth=1) | |
plt.show() | |
df1['Daily Miles'].median() | |
df2 = pd.DataFrame() | |
df2['Miles'] = bike_trips | |
df2['timestampMs'] = pd.DataFrame(when_bike).astype(float)/1000. | |
df2['datetime'] = df2['timestampMs'].apply(lambda t: dt.datetime.fromtimestamp(t)) | |
df3 = pd.DataFrame() | |
df3['Daily Miles'] = df2['Miles'].groupby(df2['datetime'].dt.to_period('D')).sum() | |
df3.hist(bins=50) | |
plt.xlabel('Miles Biked per Day') | |
plt.ylabel('Number of Days') | |
plt.title('Histogram of Miles Biked per Day') | |
plt.axvline(df3['Daily Miles'].median(), color='y', linestyle='dashed', linewidth=1) | |
plt.show() | |
df3['Daily Miles'].median() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment