Skip to content

Instantly share code, notes, and snippets.

@philshem
Last active March 20, 2020 16:35
Show Gist options
  • Select an option

  • Save philshem/a472314e62c987aee68fcd780bdde170 to your computer and use it in GitHub Desktop.

Select an option

Save philshem/a472314e62c987aee68fcd780bdde170 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
'''
Post processing the scrape job
requires `pip install pandas gcsfs`
'''
import pandas as pd
import numpy as np
from datetime import datetime
metadata = 'https://docs.google.com/spreadsheets/d/1KDqquW2axaUM9Z62JbyppuPq09IpAZRSIpPLb08nVqQ/gviz/tq?tqx=out:csv&sheet=Sheet1'
md = pd.read_csv(metadata)
md.set_index('url',inplace=True)
# join to data on field "url"
#url = 'all_valid.csv' # only current measurements
url = 'gs://kantonzh-covid-hkfsaqgshw/gmaps_scrape/all_valid.csv'
N = 5 # how many head/tail to print
jetzt = datetime.now()
today = ("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")[jetzt.weekday()]
last_hour = jetzt.hour - 2
df = pd.read_csv(url)
# parse datetimes
#df['scrape_time'] = pd.to_datetime(df.scrape_time, format='%Y%m%d_%H%M%S')
# remove unused columns
del df['place']
del df['scrape_time']
# keep only rows with a valid current time
df = df[df.popularity_percent_normal.notnull()]
df = df[df.day_of_week == today]
df = df[df.hour_of_day == last_hour]
#df = df[df.hour_of_day == 15]
#df = df[df.popularity_percent_current.notnull()]
# truncate scrape timestamp to hours
#df['scrape_time'] = df['scrape_time'].dt.floor('h')
# create groups for each place, day of week, hour of day
df = df.groupby(['url','hour_of_day','day_of_week']).mean()
# calc ration between current and normal popularity
df['popularity_factor'] = (df.popularity_percent_current - df.popularity_percent_normal)/ df.popularity_percent_normal
df['popularity_category'] = np.select(
[
df['popularity_factor'].between(-99.0, -0.5, inclusive=True),
df['popularity_factor'].between(-0.5, -0.25, inclusive=True),
df['popularity_factor'].between(-0.25, 0.25, inclusive=True),
df['popularity_factor'].between(0.25, 0.5, inclusive=True),
df['popularity_factor'].between(0.25, 99.0, inclusive=True),
],
[
'Much less crowded than usual',
'A little less crowded than usual',
'About the same',
'A little more crowded than usual',
'Much more crowded than usual'
],
default='Unknown'
)
#df.rename(columns=lambda x: x.replace('_',' '), inplace=True)
df.reset_index(inplace=True)
md.reset_index(inplace=True)
# join metadata and data
df = pd.merge(md, df, left_on='url',right_on='url')
df.sort_values(by='popularity_factor', ascending=False, inplace=True)
#df = df[df.popularity_category.notnull()]
#print(df)
df.to_csv('popularity.csv',index=False)
url description category importance location hour_of_day day_of_week popularity_percent_normal popularity_percent_current popularity_factor popularity_category
https://goo.gl/maps/w4XMSmdS9J2WL1SU6 Zürich Triemli (Bahnhof) Public Transport 1 Stadt Zürich 15 Friday 31.0 66 1.1290322580645162 Much more crowded than usual
https://goo.gl/maps/PKq4ULD9Ai2if3Gp6 Coop Supermarkt Zürich Wiedikon Groceries 1 Stadt Zürich 15 Friday 30.0 56 0.8666666666666667 Much more crowded than usual
https://goo.gl/maps/bRUiML4YGqmBtYEa9 Coop Wiedikon Groceries 1 Stadt Zürich 15 Friday 30.0 55 0.8333333333333334 Much more crowded than usual
https://goo.gl/maps/oyEE7o8RLVym7LnZ8 Migros Supermarkt Birmensdorferstrasse Groceries 1 Stadt Zürich 15 Friday 42.0 70 0.6666666666666666 Much more crowded than usual
https://goo.gl/maps/5y1K6g2EFkL7wDbZ8 ALDI Dübendorf Groceries 1 nicht Stadt Zürich 15 Friday 51.0 69 0.35294117647058826 A little more crowded than usual
https://goo.gl/maps/DEFVXRLyNLT9rH9JA Migros Supermarkt Winterthur Groceries 1 nicht Stadt Zürich 15 Friday 36.0 47 0.3055555555555556 A little more crowded than usual
https://goo.gl/maps/7x3qcNkvDLgzfiZh6 Migros Männedorf Groceries 1 nicht Stadt Zürich 15 Friday 56.0 71 0.26785714285714285 A little more crowded than usual
https://goo.gl/maps/ar77JYX1DGJaE5aR8 Coop Winterthur Grüzemarkt Groceries 1 nicht Stadt Zürich 15 Friday 45.0 49 0.08888888888888889 About the same
https://goo.gl/maps/vsiCjYgUoJU79bDZ6 Coop Thalwil Groceries 1 nicht Stadt Zürich 15 Friday 53.0 54 0.018867924528301886 About the same
https://goo.gl/maps/kHLomhtYszTwGh2M6 Josefwiese Parks 1 Stadt Zürich 15 Friday 37.0 36 -0.02702702702702703 About the same
https://goo.gl/maps/JzVjikfwASSgSNLP8 Migros Meilen Groceries 1 nicht Stadt Zürich 15 Friday 54.0 52 -0.037037037037037035 About the same
https://goo.gl/maps/42J9Dm7Hcww8Kwud7 Migros Uster Groceries 1 nicht Stadt Zürich 15 Friday 57.0 54 -0.05263157894736842 About the same
https://goo.gl/maps/fnDJ3iDrA2w5M89p9 Coop Stäfa Groceries 1 nicht Stadt Zürich 15 Friday 50.0 44 -0.12 About the same
https://goo.gl/maps/nVxxoQs8puA779Xv7 Raststätte Affoltern am Albis Traffic 2 Stadt Zürich 15 Friday 46.0 39 -0.15217391304347827 About the same
https://goo.gl/maps/4KeEQuDGRHnssfDd7 ALDI Männedorf Groceries 1 nicht Stadt Zürich 15 Friday 64.0 52 -0.1875 About the same
https://goo.gl/maps/pHwZgT5f8Vt6YQtV6 Coop Küsnacht Groceries 1 nicht Stadt Zürich 15 Friday 71.0 57 -0.19718309859154928 About the same
https://goo.gl/maps/NMJQnhWoro7nftyd9 Migros Hönggerberg Groceries 1 Stadt Zürich 15 Friday 41.0 30 -0.2682926829268293 A little less crowded than usual
https://goo.gl/maps/tBY7dVc9FniKDrxV8 Migros Schmiede Wiedikon Groceries 1 Stadt Zürich 15 Friday 65.0 47 -0.27692307692307694 A little less crowded than usual
https://goo.gl/maps/tBY7dVc9FniKDrxV8 Flughafen Zürich Public Transport 1 Stadt Zürich 15 Friday 65.0 47 -0.27692307692307694 A little less crowded than usual
https://goo.gl/maps/FvF7p7omPevpd6dv8 Migros Thalwil Groceries 1 nicht Stadt Zürich 15 Friday 67.0 48 -0.2835820895522388 A little less crowded than usual
https://goo.gl/maps/dVb6V16vxA1Ziswe7 ALDI Winterthur Groceries 1 nicht Stadt Zürich 15 Friday 58.0 35 -0.39655172413793105 A little less crowded than usual
https://goo.gl/maps/mtKdptssyDkt8AzeA Coop Bahnhofsbrücke Groceries 1 Stadt Zürich 15 Friday 40.0 24 -0.4 A little less crowded than usual
https://goo.gl/maps/DRKiTGMVtv3RujMp8 Migro Winterthur Groceries 1 nicht Stadt Zürich 15 Friday 42.0 24 -0.42857142857142855 A little less crowded than usual
https://goo.gl/maps/PpmPVauuKgEBCeEA7 Uster (Bahnhof) Public Transport 1 nicht Stadt Zürich 15 Friday 37.0 20 -0.4594594594594595 A little less crowded than usual
https://goo.gl/maps/psAXiBgBsnYm8jC87 Migros Stäfa Groceries 1 nicht Stadt Zürich 15 Friday 71.0 38 -0.4647887323943662 A little less crowded than usual
https://goo.gl/maps/C9BfzMCpm3X8Hzyv7 Wetzikon (Bahnhof) Public Transport 1 nicht Stadt Zürich 15 Friday 59.0 30 -0.4915254237288136 A little less crowded than usual
https://goo.gl/maps/iFeW6y6nyXAjxciZ8 Aldi Oerlikon Groceries 1 Stadt Zürich 15 Friday 53.0 26 -0.5094339622641509 Much less crowded than usual
https://goo.gl/maps/zVQ2yvVtaVTQ3upEA Lidl Fraumünster Groceries 1 Stadt Zürich 15 Friday 46.0 22 -0.5217391304347826 Much less crowded than usual
https://goo.gl/maps/ZtojJrJAbkGgrPeJ8 Juckerhof Leisure 2 nicht Stadt Zürich 15 Friday 20.0 8 -0.6 Much less crowded than usual
https://goo.gl/maps/qiPeTDvYnELLZA8s7 Zürich Hardbrücke Public Transport 1 Stadt Zürich 15 Friday 77.0 29 -0.6233766233766234 Much less crowded than usual
https://goo.gl/maps/AnRYn1F8NfSGLexf7 Bahnhof Stadelhofen Public Transport 1 Stadt Zürich 15 Friday 79.0 28 -0.6455696202531646 Much less crowded than usual
https://goo.gl/maps/48LUNgs9FLyYQxsJ8 Dübendorf (Bahnhof) Public Transport 1 nicht Stadt Zürich 15 Friday 57.0 20 -0.6491228070175439 Much less crowded than usual
https://g.page/sihlcity-zurich?share Sihlcity Leisure 3 Stadt Zürich 15 Friday 49.0 16 -0.673469387755102 Much less crowded than usual
https://goo.gl/maps/NZyFv6ofu6cCqopa8 Bahnhof Zürich Oerlikon Public Transport 1 Stadt Zürich 15 Friday 86.0 27 -0.686046511627907 Much less crowded than usual
https://goo.gl/maps/BtAm4Uds3GE5GTZp9 Winterthur (Bahnhof) Public Transport 1 nicht Stadt Zürich 15 Friday 68.0 21 -0.6911764705882353 Much less crowded than usual
https://goo.gl/maps/rBHSWqhuwUQf4Tiq8 Pfister Dübendorf Shopping 3 nicht Stadt Zürich 15 Friday 38.0 11 -0.7105263157894737 Much less crowded than usual
https://goo.gl/maps/qhaKLXUmp8JnbQtUA Hauptbahnhof Public Transport 1 Stadt Zürich 15 Friday 63.0 18 -0.7142857142857143 Much less crowded than usual
https://goo.gl/maps/zZNeNTKEJDQvfFct5 Zürich Enge (Bahnhof) Public Transport 1 Stadt Zürich 15 Friday 71.0 20 -0.7183098591549296 Much less crowded than usual
https://goo.gl/maps/Q2x15GxkhnVYBTJB6 Migros Löwenstrasse Groceries 1 Stadt Zürich 15 Friday 47.0 13 -0.723404255319149 Much less crowded than usual
https://goo.gl/maps/FkmTFTAzRGRwqA1g9 Lindenhof POI 3 Stadt Zürich 15 Friday 43.0 11 -0.7441860465116279 Much less crowded than usual
https://goo.gl/maps/1qdwnsdgVT73dqXb9 Uetliberg Bahnhof Public Transport 1 Stadt Zürich 15 Friday 31.0 1 -0.967741935483871 Much less crowded than usual
https://goo.gl/maps/hMhhhdRvPhXVTPsg7 Ikea Dietlikon Shopping 3 nicht Stadt Zürich 15 Friday 42.0 1 -0.9761904761904762 Much less crowded than usual
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment