Created
July 18, 2019 06:58
-
-
Save evmcheb/f6e3e45e9c07010e9fe402434d69b6f1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Data gathering\n", | |
"Data is scraped from the police website using formats found via [the web archive](http://web.archive.org/web/*/https://www.police.wa.gov.au/Traffic/Cameras/Camera-locations)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import tabula\n", | |
"import pandas as pd\n", | |
"import json\n", | |
"\n", | |
"s ='''https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-25032019-to-31032019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-31122018-to-06012019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-05112018-to-11112018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-03092018-to-09092018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-30072018-to-05082018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-23072018-to-29072018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-18062018-to-24062018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-11062018-to-17062018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-04062018-to-10062018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-28052018-to-03062018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-14-July-2019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Speed-camera-locations-to-7-July-2019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-30-June-2019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Speed-Camera-Locations-17062019-to-23062019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-16-June-2019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-9-June-2019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-12-May-2019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-08042019-to-14042019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-01042019-to-07042019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-18032019-to-24032019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-11032019-to-17032019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-04032019-to-10032019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-25022019-to-03032019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-18022019-to-24022019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-11022019-to-17022019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-04022019-to-10022019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-28012019-to-03022019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-21012019-to-27012019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-07012019-to-13012019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-25032019-to-31032019.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-05032018-to-11032018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-26022018-to-04032018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-19022018-to-25022018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-12022018-to-18022018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-05022018-to-11022018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-29012018-to-04022018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-01012018-to-07012018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-24122018-to-30122018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-10122018-to-16122018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-26112018-to-02122018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-19112018-to-25112018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-12112018-to-18112018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-22102018-to-28102018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-15102018-to-21102018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-08102018-to-14102018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-01102018-to-07102018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-24092018-to-30092018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-17092018-to-23092018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-10092018-to-16092018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-20082018-to-26082018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-13082018-to-19082018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-16072018-to-22072018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-09072018-to-15072018.pdf?la=en\n", | |
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-02072018-to-08072018.pdf?la=en'''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"fix_date = lambda x: x.split(\", \")[1] if \", \" in x else ' '.join(x.split(\" \")[1:])\n", | |
"\n", | |
"result = {}\n", | |
"\n", | |
"for pdf in s.split(\"\\n\"):\n", | |
" print(pdf)\n", | |
" df = tabula.read_pdf(pdf, pages='all')\n", | |
" print(df.columns)\n", | |
" date = fix_date(df.columns[0])\n", | |
" \n", | |
" result[date] = {}\n", | |
" df.columns = ['st1','su1','st2','su2']\n", | |
" for index, row in df.iterrows():\n", | |
" if \"Street Name\" == row[df.columns[0]]:\n", | |
" continue\n", | |
" if any(year in row['st1'] for year in [str(i) for i in range(2000, 2021)]):\n", | |
" date = fix_date(row['st1'])\n", | |
" if date not in result:\n", | |
" result[date] = {}\n", | |
" continue\n", | |
" pair1, pair2 = (row['st1'], row['su1']), (row['st2'], row['su2'])\n", | |
" if not pd.isna(pair1[0]) and not pd.isna(pair1[1]):\n", | |
" if pair1[0] not in result[date]:\n", | |
" result[date][pair1[0]] = []\n", | |
" if pair1[1] not in result[date][pair1[0]]: result[date][pair1[0]].append(pair1[1]) \n", | |
" if not pd.isna(pair2[0]) and not pd.isna(pair2[1]):\n", | |
" if pair2[0] not in result[date]:\n", | |
" result[date][pair2[0]] = []\n", | |
" if pair2[1] not in result[date][pair2[0]]: result[date][pair2[0]].append(pair2[1])\n", | |
" print(date, pair1, pair2)\n", | |
"%store result\n", | |
" \n", | |
"with open(\"out.json\", \"w\") as f:\n", | |
" json.dump(result, f)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%store -r\n", | |
"import numpy as np\n", | |
"import json\n", | |
"print(result)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Converts from date string to date object (so it can be sorted)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import datetime\n", | |
"\n", | |
"dt = datetime.datetime\n", | |
"\n", | |
"for key in result:\n", | |
" newkey = dt.strptime(key, \"%d %B %Y\")\n", | |
" print(newkey)\n", | |
" result[newkey] = result.pop(key)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%store result\n", | |
"print(len(result))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pre-processing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"unique_streets_suburbs = []\n", | |
"for day in result:\n", | |
" for street in result[day]:\n", | |
" for suburb in result[day][street]:\n", | |
" unique_street_suburb = street+\"|\"+suburb\n", | |
" if unique_street_suburb not in unique_streets_suburbs: unique_streets_suburbs.append(unique_street_suburb)\n", | |
"print(len(unique_streets_suburbs))\n", | |
"unique_streets_suburbs.sort()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# sort by date to determine earliest/latest date" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"by_date = sorted(result.items())\n", | |
"print(by_date)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import datetime\n", | |
"earliest_date, latest_date = datetime.datetime(2018, 1, 29, 0, 0), datetime.datetime(2019, 7, 14, 0, 0)\n", | |
"delta = latest_date - earliest_date\n", | |
"print(delta.days)\n", | |
"current_date = earliest_date\n", | |
"\n", | |
"answers = np.zeros((delta.days, len(unique_streets_suburbs)))\n", | |
"x_values = np.zeros((delta.days, 2))\n", | |
"\n", | |
"for i, day in enumerate(answers):\n", | |
" if current_date not in result:\n", | |
" x_values[i] = (-1, -1)\n", | |
" current_date += datetime.timedelta(days=1)\n", | |
" continue\n", | |
" else:\n", | |
" # [epoch delta, is_weekend, ]\n", | |
" x_values[i] = ((current_date - earliest_date).days, 1 if current_date.weekday() >= 5 else 0)\n", | |
" streets = result[current_date]\n", | |
" for street in streets:\n", | |
" for suburb in streets[street]:\n", | |
" combined = street+\"|\"+suburb\n", | |
" answers[i][unique_streets_suburbs.index(combined)] = 1\n", | |
" current_date += datetime.timedelta(days=1)\n", | |
"print(len(answers[0]))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## TensorFlow" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from __future__ import print_function\n", | |
"import keras\n", | |
"from keras.datasets import mnist\n", | |
"import tensorflow as tf\n", | |
"from keras.models import Sequential, Model\n", | |
"from keras.layers import Dense, Dropout, Flatten\n", | |
"from keras.layers import Conv2D, MaxPooling2D\n", | |
"import tensorflow as tf\n", | |
"from keras.backend.tensorflow_backend import set_session\n", | |
"from keras import backend as K\n", | |
"\n", | |
"config = tf.ConfigProto()\n", | |
"config.gpu_options.allow_growth = True\n", | |
"sess = tf.Session(config=config)\n", | |
"set_session(sess)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"batch_size = 14\n", | |
"epochs = 12\n", | |
"out_class = len(answers[0])\n", | |
"\n", | |
"(x_train, y_train), (x_test, y_test) = (x_values[:-100], answers[:-100]), (x_values[-100:], answers[-100:])\n", | |
"print(x_test.shape)\n", | |
"model = Sequential()\n", | |
"model.add(Dropout(0.1))\n", | |
"model.add(Dense(64, activation='relu', input_shape=(2,)))\n", | |
"model.add(Dropout(0.3))\n", | |
"model.add(Dense(128, activation='relu', input_shape=(2,)))\n", | |
"model.add(Dropout(0.5))\n", | |
"model.add(Dense(out_class, activation='sigmoid'))\n", | |
"\n", | |
"model.compile(loss=keras.losses.binary_crossentropy,\n", | |
" optimizer=keras.optimizers.RMSprop(),\n", | |
" metrics=['accuracy'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"history = model.fit(x_train, y_train,\n", | |
" batch_size=batch_size,\n", | |
" epochs=12,\n", | |
" verbose=1,\n", | |
" validation_data=(x_test, y_test))\n", | |
"model.summary()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import matplotlib.pyplot as plt\n", | |
"plt.clf()\n", | |
"history_dict = history.history\n", | |
"loss_values, val_loss = history_dict['loss'], history_dict['val_loss']\n", | |
"epochs = range(1, len(history_dict['acc'])+1)\n", | |
"\n", | |
"plt.plot(epochs, loss_values, 'bo', label=\"Training loss\")\n", | |
"plt.plot(epochs, val_loss, 'b', label=\"Validation loss\")\n", | |
"plt.title(\"Training and validation loss\")\n", | |
"plt.xlabel('Epochs')\n", | |
"plt.ylabel('Loss')\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.clf()\n", | |
"history_dict = history.history\n", | |
"loss_values, val_loss = history_dict['acc'], history_dict['val_acc']\n", | |
"epochs = range(1, len(history_dict['acc'])+1)\n", | |
"\n", | |
"plt.plot(epochs, loss_values, 'bo', label=\"Tra|ining acc\")\n", | |
"plt.plot(epochs, val_loss, 'b', label=\"Validation acc\")\n", | |
"plt.title(\"Training and validation acc\")\n", | |
"plt.xlabel('Epochs')\n", | |
"plt.ylabel('Loss')\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Predicting" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"print(x_test[-1])\n", | |
"test = np.zeros((1,2,))\n", | |
"test[0][0] = 530\n", | |
"test[0][1] = 0\n", | |
"print(test.shape)\n", | |
"pred = model.predict(test, batch_size=14, verbose=1)\n", | |
"print(pred.shape)\n", | |
"largest, index = 0, 0\n", | |
"for i, acc in enumerate(pred[0]):\n", | |
" if acc > largest: largest, index = acc, i\n", | |
" \n", | |
"new_dt = datetime.datetime(2018, 1, 29, 0, 0) + datetime.timedelta(days=532)\n", | |
"print(new_dt)\n", | |
"print(unique_streets_suburbs[index] +\": \"+ str(largest))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment