Skip to content

Instantly share code, notes, and snippets.

@evmcheb
Created July 18, 2019 06:58
Show Gist options
  • Save evmcheb/f6e3e45e9c07010e9fe402434d69b6f1 to your computer and use it in GitHub Desktop.
Save evmcheb/f6e3e45e9c07010e9fe402434d69b6f1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data gathering\n",
"Data is scraped from the police website using formats found via [the web archive](http://web.archive.org/web/*/https://www.police.wa.gov.au/Traffic/Cameras/Camera-locations)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tabula\n",
"import pandas as pd\n",
"import json\n",
"\n",
"s ='''https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-25032019-to-31032019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-31122018-to-06012019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-05112018-to-11112018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-03092018-to-09092018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-30072018-to-05082018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-23072018-to-29072018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-18062018-to-24062018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-11062018-to-17062018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-04062018-to-10062018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-28052018-to-03062018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-14-July-2019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Speed-camera-locations-to-7-July-2019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-30-June-2019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Speed-Camera-Locations-17062019-to-23062019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-16-June-2019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-9-June-2019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/Speed-Camera-locations-to-12-May-2019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-08042019-to-14042019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-01042019-to-07042019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-18032019-to-24032019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-11032019-to-17032019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-04032019-to-10032019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-25022019-to-03032019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-18022019-to-24022019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-11022019-to-17022019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-04022019-to-10022019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-28012019-to-03022019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-21012019-to-27012019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-07012019-to-13012019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-25032019-to-31032019.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-05032018-to-11032018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-26022018-to-04032018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-19022018-to-25022018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-12022018-to-18022018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-05022018-to-11022018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-29012018-to-04022018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-01012018-to-07012018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-24122018-to-30122018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-10122018-to-16122018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-26112018-to-02122018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-19112018-to-25112018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-12112018-to-18112018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-22102018-to-28102018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-15102018-to-21102018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-08102018-to-14102018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-01102018-to-07102018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-24092018-to-30092018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-17092018-to-23092018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-10092018-to-16092018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-20082018-to-26082018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-13082018-to-19082018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-16072018-to-22072018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-09072018-to-15072018.pdf?la=en\n",
"https://www.police.wa.gov.au/~/media/Files/Police/Traffic/Cameras/Camera-Locations/MediaLocations-02072018-to-08072018.pdf?la=en'''"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"fix_date = lambda x: x.split(\", \")[1] if \", \" in x else ' '.join(x.split(\" \")[1:])\n",
"\n",
"result = {}\n",
"\n",
"for pdf in s.split(\"\\n\"):\n",
" print(pdf)\n",
" df = tabula.read_pdf(pdf, pages='all')\n",
" print(df.columns)\n",
" date = fix_date(df.columns[0])\n",
" \n",
" result[date] = {}\n",
" df.columns = ['st1','su1','st2','su2']\n",
" for index, row in df.iterrows():\n",
" if \"Street Name\" == row[df.columns[0]]:\n",
" continue\n",
" if any(year in row['st1'] for year in [str(i) for i in range(2000, 2021)]):\n",
" date = fix_date(row['st1'])\n",
" if date not in result:\n",
" result[date] = {}\n",
" continue\n",
" pair1, pair2 = (row['st1'], row['su1']), (row['st2'], row['su2'])\n",
" if not pd.isna(pair1[0]) and not pd.isna(pair1[1]):\n",
" if pair1[0] not in result[date]:\n",
" result[date][pair1[0]] = []\n",
" if pair1[1] not in result[date][pair1[0]]: result[date][pair1[0]].append(pair1[1]) \n",
" if not pd.isna(pair2[0]) and not pd.isna(pair2[1]):\n",
" if pair2[0] not in result[date]:\n",
" result[date][pair2[0]] = []\n",
" if pair2[1] not in result[date][pair2[0]]: result[date][pair2[0]].append(pair2[1])\n",
" print(date, pair1, pair2)\n",
"%store result\n",
" \n",
"with open(\"out.json\", \"w\") as f:\n",
" json.dump(result, f)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%store -r\n",
"import numpy as np\n",
"import json\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Converts from date string to date object (so it can be sorted)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"\n",
"dt = datetime.datetime\n",
"\n",
"for key in result:\n",
" newkey = dt.strptime(key, \"%d %B %Y\")\n",
" print(newkey)\n",
" result[newkey] = result.pop(key)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%store result\n",
"print(len(result))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pre-processing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"unique_streets_suburbs = []\n",
"for day in result:\n",
" for street in result[day]:\n",
" for suburb in result[day][street]:\n",
" unique_street_suburb = street+\"|\"+suburb\n",
" if unique_street_suburb not in unique_streets_suburbs: unique_streets_suburbs.append(unique_street_suburb)\n",
"print(len(unique_streets_suburbs))\n",
"unique_streets_suburbs.sort()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# sort by date to determine earliest/latest date"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"by_date = sorted(result.items())\n",
"print(by_date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"earliest_date, latest_date = datetime.datetime(2018, 1, 29, 0, 0), datetime.datetime(2019, 7, 14, 0, 0)\n",
"delta = latest_date - earliest_date\n",
"print(delta.days)\n",
"current_date = earliest_date\n",
"\n",
"answers = np.zeros((delta.days, len(unique_streets_suburbs)))\n",
"x_values = np.zeros((delta.days, 2))\n",
"\n",
"for i, day in enumerate(answers):\n",
" if current_date not in result:\n",
" x_values[i] = (-1, -1)\n",
" current_date += datetime.timedelta(days=1)\n",
" continue\n",
" else:\n",
" # [epoch delta, is_weekend, ]\n",
" x_values[i] = ((current_date - earliest_date).days, 1 if current_date.weekday() >= 5 else 0)\n",
" streets = result[current_date]\n",
" for street in streets:\n",
" for suburb in streets[street]:\n",
" combined = street+\"|\"+suburb\n",
" answers[i][unique_streets_suburbs.index(combined)] = 1\n",
" current_date += datetime.timedelta(days=1)\n",
"print(len(answers[0]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## TensorFlow"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"import keras\n",
"from keras.datasets import mnist\n",
"import tensorflow as tf\n",
"from keras.models import Sequential, Model\n",
"from keras.layers import Dense, Dropout, Flatten\n",
"from keras.layers import Conv2D, MaxPooling2D\n",
"import tensorflow as tf\n",
"from keras.backend.tensorflow_backend import set_session\n",
"from keras import backend as K\n",
"\n",
"config = tf.ConfigProto()\n",
"config.gpu_options.allow_growth = True\n",
"sess = tf.Session(config=config)\n",
"set_session(sess)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"batch_size = 14\n",
"epochs = 12\n",
"out_class = len(answers[0])\n",
"\n",
"(x_train, y_train), (x_test, y_test) = (x_values[:-100], answers[:-100]), (x_values[-100:], answers[-100:])\n",
"print(x_test.shape)\n",
"model = Sequential()\n",
"model.add(Dropout(0.1))\n",
"model.add(Dense(64, activation='relu', input_shape=(2,)))\n",
"model.add(Dropout(0.3))\n",
"model.add(Dense(128, activation='relu', input_shape=(2,)))\n",
"model.add(Dropout(0.5))\n",
"model.add(Dense(out_class, activation='sigmoid'))\n",
"\n",
"model.compile(loss=keras.losses.binary_crossentropy,\n",
" optimizer=keras.optimizers.RMSprop(),\n",
" metrics=['accuracy'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"history = model.fit(x_train, y_train,\n",
" batch_size=batch_size,\n",
" epochs=12,\n",
" verbose=1,\n",
" validation_data=(x_test, y_test))\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.clf()\n",
"history_dict = history.history\n",
"loss_values, val_loss = history_dict['loss'], history_dict['val_loss']\n",
"epochs = range(1, len(history_dict['acc'])+1)\n",
"\n",
"plt.plot(epochs, loss_values, 'bo', label=\"Training loss\")\n",
"plt.plot(epochs, val_loss, 'b', label=\"Validation loss\")\n",
"plt.title(\"Training and validation loss\")\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.clf()\n",
"history_dict = history.history\n",
"loss_values, val_loss = history_dict['acc'], history_dict['val_acc']\n",
"epochs = range(1, len(history_dict['acc'])+1)\n",
"\n",
"plt.plot(epochs, loss_values, 'bo', label=\"Tra|ining acc\")\n",
"plt.plot(epochs, val_loss, 'b', label=\"Validation acc\")\n",
"plt.title(\"Training and validation acc\")\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Predicting"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(x_test[-1])\n",
"test = np.zeros((1,2,))\n",
"test[0][0] = 530\n",
"test[0][1] = 0\n",
"print(test.shape)\n",
"pred = model.predict(test, batch_size=14, verbose=1)\n",
"print(pred.shape)\n",
"largest, index = 0, 0\n",
"for i, acc in enumerate(pred[0]):\n",
" if acc > largest: largest, index = acc, i\n",
" \n",
"new_dt = datetime.datetime(2018, 1, 29, 0, 0) + datetime.timedelta(days=532)\n",
"print(new_dt)\n",
"print(unique_streets_suburbs[index] +\": \"+ str(largest))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment