Last active
October 25, 2018 21:59
-
-
Save sven4all/56bf566e3df3837098962accc2e7dc2a to your computer and use it in GitHub Desktop.
This scripts was written for a Master Thesis research at the TU Delft to collect data about the usage characteristics of Mobike and derive trips of that data, for a movie that was made with help of the data collection performed with this script see https://www.youtube.com/watch?v=MVqJtJA6_wg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Database structure that was used to store the data | |
-- Postgresql 10.5 + PostGis 2.4.4 | |
CREATE TABLE cycle_measurement ( | |
sample_id INT, | |
bike_id VARCHAR(255), | |
location GEOGRAPHY, | |
bike_type VARCHAR(10) | |
); | |
CREATE TABLE sample ( | |
sample_id SERIAL, | |
start_time timestamp, | |
end_time timestamp, | |
number_of_bicycles INT | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import psycopg2 | |
import csv | |
conn = psycopg2.connect("dbname=mobike") | |
cur = conn.cursor() | |
# Query all bikes that appeared at least once in the data. | |
stmt1= """SELECT DISTINCT(bike_id) FROM cycle_measurement""" | |
cur.execute(stmt1) | |
# Bike that has made the most trips | |
max_number_of_trips = 0 | |
bike_id_most_trips = '' | |
# Indicates number of trips within time period. | |
active_bike_counter = 0 | |
# Array that contains all the trips that are found with the query | |
all_trips = [] | |
index_bike = 0 | |
colnames = None | |
bike_ids = cur.fetchall() | |
# Execute for every bike_id | |
for idx, bike_id in enumerate(bike_ids): | |
# Returns all the trips bike made within time frame. | |
stmt2 = """ | |
SELECT bike_id, bike_type, start_x, start_y, end_x, end_y, s2.start_time + '2 HOURS' as trip_start_time, s1.start_time + '2 HOURS' as trip_end_time | |
FROM | |
(SELECT bike_id, | |
bike_type, | |
ST_DISTANCE(LOCATION, prev_geom) AS distance, | |
ST_X(LOCATION::geometry) AS end_x, | |
ST_Y(LOCATION::geometry) AS end_y, | |
ST_X(prev_geom::geometry) AS start_x, | |
ST_Y(prev_geom::geometry) AS start_y, | |
sample_id, | |
prev_sample_id, | |
sample_id - prev_sample_id as sample_difference | |
FROM | |
(SELECT bike_id, | |
bike_type, | |
LOCATION, | |
sample_id, | |
lag(LOCATION) OVER ( | |
ORDER BY sample_id) AS prev_geom, | |
lag(sample_id) OVER ( | |
ORDER BY sample_id) AS prev_sample_id | |
FROM cycle_measurement | |
WHERE bike_id = '%s') AS q1) AS q2 | |
JOIN sample s1 | |
ON q2.sample_id = s1.sample_id | |
JOIN sample s2 | |
ON prev_sample_id = s2.sample_id | |
WHERE distance > 200 and (s2.start_time + '2 HOURS') >= '2018-10-23' and (s2.end_time + '2 HOURS') < '2018-10-24'; | |
""" % bike_id | |
cur.execute(stmt2) | |
trips = cur.fetchall() | |
if not colnames: | |
colnames = [desc[0] for desc in cur.description] | |
if len(trips) > 0: | |
active_bike_counter += 1 | |
if len(trips) > max_number_of_trips: | |
max_number_of_trips= len(trips) | |
bike_id_most_trips = bike_id | |
# append all trips at end of list | |
all_trips.extend(trips) | |
print("Progress {} / {} ".format(idx , len(bike_ids)), end="\r") | |
csvfile = "trips.csv" | |
with open(csvfile, "w") as output: | |
writer = csv.writer(output, lineterminator='\n') | |
writer.writerow(colnames) | |
writer.writerows(all_trips) | |
print("Most trips %s, with bike_id %s." % (max_number_of_trips, bike_id_most_trips)) | |
print("Total number of trips: %s" % len(all_trips)) | |
print("Number of active bikes: %s" % active_bike_counter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import psycopg2 | |
import logging | |
import time | |
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) | |
class MobikeLocation(): | |
def __init__(self, bike_id, lat, lng, bike_type): | |
self.bike_id = bike_id | |
self.lat = lat | |
self.lng = lng | |
self.bike_type = bike_type | |
def save(self, cur, measurement_id): | |
cur.execute(""" | |
INSERT INTO cycle_measurement | |
(sample_id, bike_id, location, bike_type) VALUES (%s, %s, ST_SetSRID(ST_MakePoint(%s, %s),4326), %s) | |
""", (measurement_id, self.bike_id, self.lng, self.lat, self.bike_type)) | |
class MobikeSniffer(): | |
def __init__(self, start_lat, start_lng, end_lat, end_lng): | |
self.start_lat = start_lat | |
self.start_lng = start_lng | |
self.end_lat = end_lat | |
self.end_lng = end_lng | |
# This is hardcoded for the Delft situation, adjust for your own situation. | |
self.diff_lat = 0.005 | |
self.diff_lng = 0.004 | |
def snif_bikes(self): | |
bikes = {} | |
lat = self.start_lat | |
while lat > self.end_lat: | |
lat = lat - self.diff_lat | |
lng = self.start_lng | |
while lng < self.end_lng: | |
lng = lng + self.diff_lng | |
tmp_bikes = self.sample(lat, lng, 0) | |
for cycle in tmp_bikes: | |
bikes[cycle["distId"]] = MobikeLocation(cycle["distId"], cycle["distY"], cycle["distX"], cycle["biketype"]) | |
return bikes | |
def sample(self, lat, lng, level): | |
if level > 1: | |
logging.info("Level %s, stop recursion", level) | |
return [] | |
url = "https://app.mobike.com/api/nearby/v4/nearbyBikeInfo?latitude=%s&longitude=%s&sign=132d4d239a9f6923190562d0a77c0b39&bikenum=50&scope=500" % (lat,lng) | |
r = requests.get(url) | |
if r.status_code != 200: | |
logging.warn(r.status_code) | |
logging.warn(r.text) | |
return [] | |
bikes = r.json()["bike"] | |
if len(bikes) >= 50: | |
bikes += self.sample(lat + 0.0014, lng + 0.0014, level + 1) | |
bikes += self.sample(lat + 0.0014, lng - 0.0014, level + 1) | |
bikes += self.sample(lat - 0.0014, lng + 0.0014, level + 1) | |
bikes += self.sample(lat - 0.0014, lng - 0.0014, level + 1) | |
return bikes | |
class MobikeImporter(): | |
def __init__(self): | |
# The 'box' for where data was collected. | |
self.sniffer = MobikeSniffer(52.030233, 4.309318, 51.983844, 4.411370) | |
self.conn = psycopg2.connect("dbname=mobike") | |
def import_bikes(self): | |
cur = self.conn.cursor() | |
cur.execute("INSERT INTO sample (start_time) VALUES (NOW()) returning sample_id") | |
measurement_id = cur.fetchone()[0] | |
self.conn.commit() | |
bikes = self.sniffer.snif_bikes() | |
for _, bike in bikes.items(): | |
bike.save(cur, measurement_id) | |
cur.execute("""UPDATE sample | |
SET end_time = NOW(), | |
number_of_bicycles = %s | |
WHERE sample_id = %s""", (len(bikes), measurement_id,)) | |
self.conn.commit() | |
return bikes | |
importer = MobikeImporter() | |
while True: | |
start = time.time() | |
logging.info("Start import.") | |
number_of_bikes = len(importer.import_bikes()) | |
logging.info("Completed import in %.2f, imported %s bikes." % ((time.time() - start), number_of_bikes)) | |
time.sleep(60 * 5 - (time.time() - start) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment