Last active
March 20, 2020 16:35
-
-
Save philshem/a472314e62c987aee68fcd780bdde170 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| ''' | |
| Post processing the scrape job | |
| requires `pip install pandas gcsfs` | |
| ''' | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime | |
| metadata = 'https://docs.google.com/spreadsheets/d/1KDqquW2axaUM9Z62JbyppuPq09IpAZRSIpPLb08nVqQ/gviz/tq?tqx=out:csv&sheet=Sheet1' | |
| md = pd.read_csv(metadata) | |
| md.set_index('url',inplace=True) | |
| # join to data on field "url" | |
| #url = 'all_valid.csv' # only current measurements | |
| url = 'gs://kantonzh-covid-hkfsaqgshw/gmaps_scrape/all_valid.csv' | |
| N = 5 # how many head/tail to print | |
| jetzt = datetime.now() | |
| today = ("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")[jetzt.weekday()] | |
| last_hour = jetzt.hour - 2 | |
| df = pd.read_csv(url) | |
| # parse datetimes | |
| #df['scrape_time'] = pd.to_datetime(df.scrape_time, format='%Y%m%d_%H%M%S') | |
| # remove unused columns | |
| del df['place'] | |
| del df['scrape_time'] | |
| # keep only rows with a valid current time | |
| df = df[df.popularity_percent_normal.notnull()] | |
| df = df[df.day_of_week == today] | |
| df = df[df.hour_of_day == last_hour] | |
| #df = df[df.hour_of_day == 15] | |
| #df = df[df.popularity_percent_current.notnull()] | |
| # truncate scrape timestamp to hours | |
| #df['scrape_time'] = df['scrape_time'].dt.floor('h') | |
| # create groups for each place, day of week, hour of day | |
| df = df.groupby(['url','hour_of_day','day_of_week']).mean() | |
| # calc ration between current and normal popularity | |
| df['popularity_factor'] = (df.popularity_percent_current - df.popularity_percent_normal)/ df.popularity_percent_normal | |
| df['popularity_category'] = np.select( | |
| [ | |
| df['popularity_factor'].between(-99.0, -0.5, inclusive=True), | |
| df['popularity_factor'].between(-0.5, -0.25, inclusive=True), | |
| df['popularity_factor'].between(-0.25, 0.25, inclusive=True), | |
| df['popularity_factor'].between(0.25, 0.5, inclusive=True), | |
| df['popularity_factor'].between(0.25, 99.0, inclusive=True), | |
| ], | |
| [ | |
| 'Much less crowded than usual', | |
| 'A little less crowded than usual', | |
| 'About the same', | |
| 'A little more crowded than usual', | |
| 'Much more crowded than usual' | |
| ], | |
| default='Unknown' | |
| ) | |
| #df.rename(columns=lambda x: x.replace('_',' '), inplace=True) | |
| df.reset_index(inplace=True) | |
| md.reset_index(inplace=True) | |
| # join metadata and data | |
| df = pd.merge(md, df, left_on='url',right_on='url') | |
| df.sort_values(by='popularity_factor', ascending=False, inplace=True) | |
| #df = df[df.popularity_category.notnull()] | |
| #print(df) | |
| df.to_csv('popularity.csv',index=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| url | description | category | importance | location | hour_of_day | day_of_week | popularity_percent_normal | popularity_percent_current | popularity_factor | popularity_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| https://goo.gl/maps/w4XMSmdS9J2WL1SU6 | Zürich Triemli (Bahnhof) | Public Transport | 1 | Stadt Zürich | 15 | Friday | 31.0 | 66 | 1.1290322580645162 | Much more crowded than usual | |
| https://goo.gl/maps/PKq4ULD9Ai2if3Gp6 | Coop Supermarkt Zürich Wiedikon | Groceries | 1 | Stadt Zürich | 15 | Friday | 30.0 | 56 | 0.8666666666666667 | Much more crowded than usual | |
| https://goo.gl/maps/bRUiML4YGqmBtYEa9 | Coop Wiedikon | Groceries | 1 | Stadt Zürich | 15 | Friday | 30.0 | 55 | 0.8333333333333334 | Much more crowded than usual | |
| https://goo.gl/maps/oyEE7o8RLVym7LnZ8 | Migros Supermarkt Birmensdorferstrasse | Groceries | 1 | Stadt Zürich | 15 | Friday | 42.0 | 70 | 0.6666666666666666 | Much more crowded than usual | |
| https://goo.gl/maps/5y1K6g2EFkL7wDbZ8 | ALDI Dübendorf | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 51.0 | 69 | 0.35294117647058826 | A little more crowded than usual | |
| https://goo.gl/maps/DEFVXRLyNLT9rH9JA | Migros Supermarkt Winterthur | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 36.0 | 47 | 0.3055555555555556 | A little more crowded than usual | |
| https://goo.gl/maps/7x3qcNkvDLgzfiZh6 | Migros Männedorf | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 56.0 | 71 | 0.26785714285714285 | A little more crowded than usual | |
| https://goo.gl/maps/ar77JYX1DGJaE5aR8 | Coop Winterthur Grüzemarkt | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 45.0 | 49 | 0.08888888888888889 | About the same | |
| https://goo.gl/maps/vsiCjYgUoJU79bDZ6 | Coop Thalwil | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 53.0 | 54 | 0.018867924528301886 | About the same | |
| https://goo.gl/maps/kHLomhtYszTwGh2M6 | Josefwiese | Parks | 1 | Stadt Zürich | 15 | Friday | 37.0 | 36 | -0.02702702702702703 | About the same | |
| https://goo.gl/maps/JzVjikfwASSgSNLP8 | Migros Meilen | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 54.0 | 52 | -0.037037037037037035 | About the same | |
| https://goo.gl/maps/42J9Dm7Hcww8Kwud7 | Migros Uster | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 57.0 | 54 | -0.05263157894736842 | About the same | |
| https://goo.gl/maps/fnDJ3iDrA2w5M89p9 | Coop Stäfa | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 50.0 | 44 | -0.12 | About the same | |
| https://goo.gl/maps/nVxxoQs8puA779Xv7 | Raststätte Affoltern am Albis | Traffic | 2 | Stadt Zürich | 15 | Friday | 46.0 | 39 | -0.15217391304347827 | About the same | |
| https://goo.gl/maps/4KeEQuDGRHnssfDd7 | ALDI Männedorf | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 64.0 | 52 | -0.1875 | About the same | |
| https://goo.gl/maps/pHwZgT5f8Vt6YQtV6 | Coop Küsnacht | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 71.0 | 57 | -0.19718309859154928 | About the same | |
| https://goo.gl/maps/NMJQnhWoro7nftyd9 | Migros Hönggerberg | Groceries | 1 | Stadt Zürich | 15 | Friday | 41.0 | 30 | -0.2682926829268293 | A little less crowded than usual | |
| https://goo.gl/maps/tBY7dVc9FniKDrxV8 | Migros Schmiede Wiedikon | Groceries | 1 | Stadt Zürich | 15 | Friday | 65.0 | 47 | -0.27692307692307694 | A little less crowded than usual | |
| https://goo.gl/maps/tBY7dVc9FniKDrxV8 | Flughafen Zürich | Public Transport | 1 | Stadt Zürich | 15 | Friday | 65.0 | 47 | -0.27692307692307694 | A little less crowded than usual | |
| https://goo.gl/maps/FvF7p7omPevpd6dv8 | Migros Thalwil | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 67.0 | 48 | -0.2835820895522388 | A little less crowded than usual | |
| https://goo.gl/maps/dVb6V16vxA1Ziswe7 | ALDI Winterthur | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 58.0 | 35 | -0.39655172413793105 | A little less crowded than usual | |
| https://goo.gl/maps/mtKdptssyDkt8AzeA | Coop Bahnhofsbrücke | Groceries | 1 | Stadt Zürich | 15 | Friday | 40.0 | 24 | -0.4 | A little less crowded than usual | |
| https://goo.gl/maps/DRKiTGMVtv3RujMp8 | Migro Winterthur | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 42.0 | 24 | -0.42857142857142855 | A little less crowded than usual | |
| https://goo.gl/maps/PpmPVauuKgEBCeEA7 | Uster (Bahnhof) | Public Transport | 1 | nicht Stadt Zürich | 15 | Friday | 37.0 | 20 | -0.4594594594594595 | A little less crowded than usual | |
| https://goo.gl/maps/psAXiBgBsnYm8jC87 | Migros Stäfa | Groceries | 1 | nicht Stadt Zürich | 15 | Friday | 71.0 | 38 | -0.4647887323943662 | A little less crowded than usual | |
| https://goo.gl/maps/C9BfzMCpm3X8Hzyv7 | Wetzikon (Bahnhof) | Public Transport | 1 | nicht Stadt Zürich | 15 | Friday | 59.0 | 30 | -0.4915254237288136 | A little less crowded than usual | |
| https://goo.gl/maps/iFeW6y6nyXAjxciZ8 | Aldi Oerlikon | Groceries | 1 | Stadt Zürich | 15 | Friday | 53.0 | 26 | -0.5094339622641509 | Much less crowded than usual | |
| https://goo.gl/maps/zVQ2yvVtaVTQ3upEA | Lidl Fraumünster | Groceries | 1 | Stadt Zürich | 15 | Friday | 46.0 | 22 | -0.5217391304347826 | Much less crowded than usual | |
| https://goo.gl/maps/ZtojJrJAbkGgrPeJ8 | Juckerhof | Leisure | 2 | nicht Stadt Zürich | 15 | Friday | 20.0 | 8 | -0.6 | Much less crowded than usual | |
| https://goo.gl/maps/qiPeTDvYnELLZA8s7 | Zürich Hardbrücke | Public Transport | 1 | Stadt Zürich | 15 | Friday | 77.0 | 29 | -0.6233766233766234 | Much less crowded than usual | |
| https://goo.gl/maps/AnRYn1F8NfSGLexf7 | Bahnhof Stadelhofen | Public Transport | 1 | Stadt Zürich | 15 | Friday | 79.0 | 28 | -0.6455696202531646 | Much less crowded than usual | |
| https://goo.gl/maps/48LUNgs9FLyYQxsJ8 | Dübendorf (Bahnhof) | Public Transport | 1 | nicht Stadt Zürich | 15 | Friday | 57.0 | 20 | -0.6491228070175439 | Much less crowded than usual | |
| https://g.page/sihlcity-zurich?share | Sihlcity | Leisure | 3 | Stadt Zürich | 15 | Friday | 49.0 | 16 | -0.673469387755102 | Much less crowded than usual | |
| https://goo.gl/maps/NZyFv6ofu6cCqopa8 | Bahnhof Zürich Oerlikon | Public Transport | 1 | Stadt Zürich | 15 | Friday | 86.0 | 27 | -0.686046511627907 | Much less crowded than usual | |
| https://goo.gl/maps/BtAm4Uds3GE5GTZp9 | Winterthur (Bahnhof) | Public Transport | 1 | nicht Stadt Zürich | 15 | Friday | 68.0 | 21 | -0.6911764705882353 | Much less crowded than usual | |
| https://goo.gl/maps/rBHSWqhuwUQf4Tiq8 | Pfister Dübendorf | Shopping | 3 | nicht Stadt Zürich | 15 | Friday | 38.0 | 11 | -0.7105263157894737 | Much less crowded than usual | |
| https://goo.gl/maps/qhaKLXUmp8JnbQtUA | Hauptbahnhof | Public Transport | 1 | Stadt Zürich | 15 | Friday | 63.0 | 18 | -0.7142857142857143 | Much less crowded than usual | |
| https://goo.gl/maps/zZNeNTKEJDQvfFct5 | Zürich Enge (Bahnhof) | Public Transport | 1 | Stadt Zürich | 15 | Friday | 71.0 | 20 | -0.7183098591549296 | Much less crowded than usual | |
| https://goo.gl/maps/Q2x15GxkhnVYBTJB6 | Migros Löwenstrasse | Groceries | 1 | Stadt Zürich | 15 | Friday | 47.0 | 13 | -0.723404255319149 | Much less crowded than usual | |
| https://goo.gl/maps/FkmTFTAzRGRwqA1g9 | Lindenhof | POI | 3 | Stadt Zürich | 15 | Friday | 43.0 | 11 | -0.7441860465116279 | Much less crowded than usual | |
| https://goo.gl/maps/1qdwnsdgVT73dqXb9 | Uetliberg Bahnhof | Public Transport | 1 | Stadt Zürich | 15 | Friday | 31.0 | 1 | -0.967741935483871 | Much less crowded than usual | |
| https://goo.gl/maps/hMhhhdRvPhXVTPsg7 | Ikea Dietlikon | Shopping | 3 | nicht Stadt Zürich | 15 | Friday | 42.0 | 1 | -0.9761904761904762 | Much less crowded than usual |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment