sven4all · October 25, 2018 21:59
diff --git a/database_structure.sql b/database_structure.sql
 -- Database structure that was used to store the data
 -- Postgresql 10.5 + PostGis 2.4.4

 CREATE TABLE cycle_measurement (
    sample_id INT,
    bike_id VARCHAR(255),
    location GEOGRAPHY,
    bike_type VARCHAR(10)
 );

 CREATE TABLE sample (
    sample_id SERIAL,
    start_time timestamp,
    end_time timestamp,
    number_of_bicycles INT
 );
diff --git a/extract_trips.py b/extract_trips.py
 import psycopg2
 import csv

 conn = psycopg2.connect("dbname=mobike")
 cur = conn.cursor()

 # Query all bikes that appeared at least once in the data.
 stmt1= """SELECT DISTINCT(bike_id) FROM cycle_measurement"""
 cur.execute(stmt1)

 # Bike that has made the most trips
 max_number_of_trips = 0
 bike_id_most_trips = ''
 # Indicates number of trips within time period.
 active_bike_counter = 0
 # Array that contains all the trips that are found with the query
 all_trips = []

 index_bike = 0

 colnames = None
 bike_ids = cur.fetchall()
 # Execute for every bike_id
 for idx, bike_id in enumerate(bike_ids):
    # Returns all the trips bike made within time frame.
    stmt2 = """
 SELECT bike_id, bike_type, start_x, start_y, end_x, end_y, s2.start_time + '2 HOURS' as trip_start_time, s1.start_time + '2 HOURS' as trip_end_time
 FROM
  (SELECT bike_id,
          bike_type,
          ST_DISTANCE(LOCATION, prev_geom) AS distance,
          ST_X(LOCATION::geometry) AS end_x,
          ST_Y(LOCATION::geometry) AS end_y,
          ST_X(prev_geom::geometry) AS start_x,
          ST_Y(prev_geom::geometry) AS start_y,
          sample_id,
          prev_sample_id,
          sample_id - prev_sample_id as sample_difference
   FROM
     (SELECT bike_id,
             bike_type,
             LOCATION,
             sample_id,
             lag(LOCATION) OVER (
                                 ORDER BY sample_id) AS prev_geom,
             lag(sample_id) OVER (
                                 ORDER BY sample_id) AS prev_sample_id
      FROM cycle_measurement
      WHERE bike_id = '%s') AS q1) AS q2
 JOIN sample s1
 ON q2.sample_id = s1.sample_id
 JOIN sample s2
 ON prev_sample_id = s2.sample_id
 WHERE distance > 200 and (s2.start_time + '2 HOURS') >= '2018-10-23' and (s2.end_time + '2 HOURS') < '2018-10-24';
 """ % bike_id
    cur.execute(stmt2)
    trips = cur.fetchall()

    if not colnames:
        colnames = [desc[0] for desc in cur.description]

    if len(trips) > 0:
        active_bike_counter += 1

    if len(trips) > max_number_of_trips:
        max_number_of_trips= len(trips)
        bike_id_most_trips = bike_id

    # append all trips at end of list
    all_trips.extend(trips)
    print("Progress {} / {} ".format(idx , len(bike_ids)), end="\r")

 csvfile = "trips.csv"
 with open(csvfile, "w") as output:
    writer = csv.writer(output, lineterminator='\n')
    writer.writerow(colnames)
    writer.writerows(all_trips)   

 print("Most trips %s, with bike_id %s." % (max_number_of_trips, bike_id_most_trips))
 print("Total number of trips: %s" % len(all_trips))
 print("Number of active bikes: %s" % active_bike_counter)
diff --git a/import_mobike.py b/import_mobike.py
 import requests
 import psycopg2
 import logging
 import time

 logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

 class MobikeLocation():
    def __init__(self, bike_id, lat, lng, bike_type):
        self.bike_id = bike_id
        self.lat = lat
        self.lng = lng
        self.bike_type = bike_type
    
    def save(self, cur, measurement_id):
        cur.execute("""
        INSERT INTO  cycle_measurement
        (sample_id, bike_id, location, bike_type) VALUES (%s, %s,  ST_SetSRID(ST_MakePoint(%s, %s),4326), %s)
        """, (measurement_id, self.bike_id, self.lng, self.lat, self.bike_type))

 class MobikeSniffer():
    def __init__(self, start_lat, start_lng, end_lat, end_lng):
        self.start_lat = start_lat
        self.start_lng = start_lng
        self.end_lat = end_lat
        self.end_lng = end_lng
        # This is hardcoded for the Delft situation, adjust for your own situation.
        self.diff_lat = 0.005
        self.diff_lng = 0.004

    def snif_bikes(self):
        bikes = {}
        lat = self.start_lat

        while lat > self.end_lat:
            lat = lat - self.diff_lat
            lng = self.start_lng
            while lng < self.end_lng:
                lng = lng + self.diff_lng
                tmp_bikes = self.sample(lat, lng, 0)
    
                for cycle in tmp_bikes:
                    bikes[cycle["distId"]] = MobikeLocation(cycle["distId"], cycle["distY"], cycle["distX"], cycle["biketype"])
        return bikes

    def sample(self, lat, lng, level):
        if level > 1:
            logging.info("Level %s, stop recursion", level)
            return []
        url = "https://app.mobike.com/api/nearby/v4/nearbyBikeInfo?latitude=%s&longitude=%s&sign=132d4d239a9f6923190562d0a77c0b39&bikenum=50&scope=500" % (lat,lng)
        r = requests.get(url)
        if r.status_code != 200:
            logging.warn(r.status_code)
            logging.warn(r.text)
            return []
        bikes = r.json()["bike"]
        if len(bikes) >= 50:
            bikes += self.sample(lat + 0.0014, lng + 0.0014, level + 1)
            bikes += self.sample(lat + 0.0014, lng - 0.0014, level + 1)
            bikes += self.sample(lat - 0.0014, lng + 0.0014, level + 1)
            bikes += self.sample(lat - 0.0014, lng - 0.0014, level + 1)
        return bikes



 class MobikeImporter():
    def __init__(self):
        # The 'box' for where data was collected. 
        self.sniffer = MobikeSniffer(52.030233, 4.309318, 51.983844, 4.411370)
        self.conn = psycopg2.connect("dbname=mobike")

    def import_bikes(self):
        cur = self.conn.cursor()
        cur.execute("INSERT INTO sample (start_time) VALUES (NOW()) returning sample_id")
        measurement_id = cur.fetchone()[0]
        self.conn.commit()

        bikes = self.sniffer.snif_bikes()
        for _, bike in bikes.items():
            bike.save(cur, measurement_id)

        cur.execute("""UPDATE sample 
        SET end_time = NOW(),
        number_of_bicycles = %s
        WHERE sample_id = %s""", (len(bikes), measurement_id,))

        self.conn.commit()
        return bikes

 importer = MobikeImporter()

 while True:
    start = time.time()
    logging.info("Start import.")
    number_of_bikes = len(importer.import_bikes())
    logging.info("Completed import in %.2f, imported %s bikes." % ((time.time() - start), number_of_bikes))
    time.sleep(60 * 5 - (time.time() - start) )
	-- Database structure that was used to store the data
	-- Postgresql 10.5 + PostGis 2.4.4

	CREATE TABLE cycle_measurement (
	sample_id INT,
	bike_id VARCHAR(255),
	location GEOGRAPHY,
	bike_type VARCHAR(10)
	);

	CREATE TABLE sample (
	sample_id SERIAL,
	start_time timestamp,
	end_time timestamp,
	number_of_bicycles INT
	);
	import psycopg2
	import csv

	conn = psycopg2.connect("dbname=mobike")
	cur = conn.cursor()

	# Query all bikes that appeared at least once in the data.
	stmt1= """SELECT DISTINCT(bike_id) FROM cycle_measurement"""
	cur.execute(stmt1)

	# Bike that has made the most trips
	max_number_of_trips = 0
	bike_id_most_trips = ''
	# Indicates number of trips within time period.
	active_bike_counter = 0
	# Array that contains all the trips that are found with the query
	all_trips = []

	index_bike = 0

	colnames = None
	bike_ids = cur.fetchall()
	# Execute for every bike_id
	for idx, bike_id in enumerate(bike_ids):
	# Returns all the trips bike made within time frame.
	stmt2 = """
	SELECT bike_id, bike_type, start_x, start_y, end_x, end_y, s2.start_time + '2 HOURS' as trip_start_time, s1.start_time + '2 HOURS' as trip_end_time
	FROM
	(SELECT bike_id,
	bike_type,
	ST_DISTANCE(LOCATION, prev_geom) AS distance,
	ST_X(LOCATION::geometry) AS end_x,
	ST_Y(LOCATION::geometry) AS end_y,
	ST_X(prev_geom::geometry) AS start_x,
	ST_Y(prev_geom::geometry) AS start_y,
	sample_id,
	prev_sample_id,
	sample_id - prev_sample_id as sample_difference
	FROM
	(SELECT bike_id,
	bike_type,
	LOCATION,
	sample_id,
	lag(LOCATION) OVER (
	ORDER BY sample_id) AS prev_geom,
	lag(sample_id) OVER (
	ORDER BY sample_id) AS prev_sample_id
	FROM cycle_measurement
	WHERE bike_id = '%s') AS q1) AS q2
	JOIN sample s1
	ON q2.sample_id = s1.sample_id
	JOIN sample s2
	ON prev_sample_id = s2.sample_id
	WHERE distance > 200 and (s2.start_time + '2 HOURS') >= '2018-10-23' and (s2.end_time + '2 HOURS') < '2018-10-24';
	""" % bike_id
	cur.execute(stmt2)
	trips = cur.fetchall()

	if not colnames:
	colnames = [desc[0] for desc in cur.description]

	if len(trips) > 0:
	active_bike_counter += 1

	if len(trips) > max_number_of_trips:
	max_number_of_trips= len(trips)
	bike_id_most_trips = bike_id

	# append all trips at end of list
	all_trips.extend(trips)
	print("Progress {} / {} ".format(idx , len(bike_ids)), end="\r")

	csvfile = "trips.csv"
	with open(csvfile, "w") as output:
	writer = csv.writer(output, lineterminator='\n')
	writer.writerow(colnames)
	writer.writerows(all_trips)

	print("Most trips %s, with bike_id %s." % (max_number_of_trips, bike_id_most_trips))
	print("Total number of trips: %s" % len(all_trips))
	print("Number of active bikes: %s" % active_bike_counter)
	import requests
	import psycopg2
	import logging
	import time

	logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

	class MobikeLocation():
	def __init__(self, bike_id, lat, lng, bike_type):
	self.bike_id = bike_id
	self.lat = lat
	self.lng = lng
	self.bike_type = bike_type

	def save(self, cur, measurement_id):
	cur.execute("""
	INSERT INTO cycle_measurement
	(sample_id, bike_id, location, bike_type) VALUES (%s, %s, ST_SetSRID(ST_MakePoint(%s, %s),4326), %s)
	""", (measurement_id, self.bike_id, self.lng, self.lat, self.bike_type))

	class MobikeSniffer():
	def __init__(self, start_lat, start_lng, end_lat, end_lng):
	self.start_lat = start_lat
	self.start_lng = start_lng
	self.end_lat = end_lat
	self.end_lng = end_lng
	# This is hardcoded for the Delft situation, adjust for your own situation.
	self.diff_lat = 0.005
	self.diff_lng = 0.004

	def snif_bikes(self):
	bikes = {}
	lat = self.start_lat

	while lat > self.end_lat:
	lat = lat - self.diff_lat
	lng = self.start_lng
	while lng < self.end_lng:
	lng = lng + self.diff_lng
	tmp_bikes = self.sample(lat, lng, 0)

	for cycle in tmp_bikes:
	bikes[cycle["distId"]] = MobikeLocation(cycle["distId"], cycle["distY"], cycle["distX"], cycle["biketype"])
	return bikes

	def sample(self, lat, lng, level):
	if level > 1:
	logging.info("Level %s, stop recursion", level)
	return []
	url = "https://app.mobike.com/api/nearby/v4/nearbyBikeInfo?latitude=%s&longitude=%s&sign=132d4d239a9f6923190562d0a77c0b39&bikenum=50&scope=500" % (lat,lng)
	r = requests.get(url)
	if r.status_code != 200:
	logging.warn(r.status_code)
	logging.warn(r.text)
	return []
	bikes = r.json()["bike"]
	if len(bikes) >= 50:
	bikes += self.sample(lat + 0.0014, lng + 0.0014, level + 1)
	bikes += self.sample(lat + 0.0014, lng - 0.0014, level + 1)
	bikes += self.sample(lat - 0.0014, lng + 0.0014, level + 1)
	bikes += self.sample(lat - 0.0014, lng - 0.0014, level + 1)
	return bikes



	class MobikeImporter():
	def __init__(self):
	# The 'box' for where data was collected.
	self.sniffer = MobikeSniffer(52.030233, 4.309318, 51.983844, 4.411370)
	self.conn = psycopg2.connect("dbname=mobike")

	def import_bikes(self):
	cur = self.conn.cursor()
	cur.execute("INSERT INTO sample (start_time) VALUES (NOW()) returning sample_id")
	measurement_id = cur.fetchone()[0]
	self.conn.commit()

	bikes = self.sniffer.snif_bikes()
	for _, bike in bikes.items():
	bike.save(cur, measurement_id)

	cur.execute("""UPDATE sample
	SET end_time = NOW(),
	number_of_bicycles = %s
	WHERE sample_id = %s""", (len(bikes), measurement_id,))

	self.conn.commit()
	return bikes

	importer = MobikeImporter()

	while True:
	start = time.time()
	logging.info("Start import.")
	number_of_bikes = len(importer.import_bikes())
	logging.info("Completed import in %.2f, imported %s bikes." % ((time.time() - start), number_of_bikes))
	time.sleep(60 * 5 - (time.time() - start) )