Skip to content

Instantly share code, notes, and snippets.

@samlexrod
Last active December 10, 2017 08:39
Show Gist options
  • Save samlexrod/ec4a76315f36ea3cb437e7402f84aa08 to your computer and use it in GitHub Desktop.
Save samlexrod/ec4a76315f36ea3cb437e7402f84aa08 to your computer and use it in GitHub Desktop.
Inspired to create a ride-share data model and queries

Rideshare Scenario - PostgreSQL

After doing a case study, I got inspired to continue building up the data model and create more queries to investigate the made-up dataset.

You will find some modified questions asked and other customized questions I made to help on making the queries.

Query Examples

  • Show bill transactions with no payments
  SELECT * FROM bill_details bd
  WHERE bd.id NOT IN (SELECT DISTINCT bill_id FROM payment_details);

  100002,'jose1','2016-06-07 22:00:21.996182',251,'1'
  100007,'thomas89','2016-06-07 22:00:21.996182',615,'1'
  100009,'john3','2016-06-07 22:00:21.996182',700,'1'
  100011,'jose1','2017-06-07 22:00:21.996182',251,'1'
  100016,'thomas89','2017-06-07 22:00:21.996182',615,'1'
  100018,'john3','2017-06-07 22:00:21.996182',700,'0'
  100019,'john3','2017-06-07 22:00:21.996182',350,'0'
  100020,'john3','2017-06-07 22:00:21.996182',700,'0'
  100021,'stev33','2016-06-07 22:00:21.996182',1001,'0'
  100022,'stev33','2016-06-07 22:00:21.996182',1201,'0' 
  • Show bill transactions fro bills made in June 2016 for less than 500
  SELECT * FROM bill_details
  WHERE EXTRACT(YEAR FROM created_at) = 2016 
    AND EXTRACT(MONTH FROM created_at) = 06 AND amount < 500;

  100001,'jose1','2016-06-07 22:00:21.996182',251,'0'
  100002,'jose1','2016-06-07 22:00:21.996182',251,'1'
  100005,'samuel33','2016-06-07 22:00:21.996182',251,'0'
  100006,'raul889','2016-06-07 22:00:21.996182',301,'0'    
  • Show bill and payment details including days w/o payments and
  SELECT
    bd.id, bd.partner_id, bd.amount AS bill_amount, bd.created_at::DATE AS bill_date, bd.error_code AS bill_error,
      py.amount AS payment_amount, py.created_at::DATE AS payment_date, (py.created_at - bd.created_at) AS payment_lag,
      CASE WHEN py.amount IS NULL THEN 'not paid' ELSE 'paid' END AS Status 
  FROM bill_details bd LEFT JOIN payment_details py ON bd.id = py.bill_id;

  100001,'jose1',251,'2016-06-07','0','300.10','2016-06-14','7 days','paid'
  100003,'jane22',451,'2016-12-07','0','410.10','2017-06-14','189 days','paid'
  100004,'stev33',651,'2016-06-07','0','500.10','2016-06-14','7 days','paid'
  100005,'samuel33',251,'2016-06-07','0','255.6','2016-06-14','7 days','paid'
  100006,'raul889',301,'2016-06-07','0','220.5','2016-12-14','190 days','paid'
  100010,'jose1',251,'2017-06-07','0','350.10','2017-06-14','7 days','paid'
  100012,'jane22',451,'2017-12-07','0','410.10','2017-06-14','-176 days','paid'
  100013,'stev33',651,'2017-06-07','0','500.10','2017-06-14','7 days','paid'
  100014,'samuel33',251,'2017-06-07','0','255.6','2017-06-14','7 days','paid'
  100015,'raul889',301,'2017-06-07','0','700.25','2017-12-14','190 days','paid'
  100017,'john3',700,'2017-06-07','0','700.00','2017-12-07','183 days','paid'
  100002,'jose1',251,'2016-06-07','1',,,,'not paid'
  100022,'stev33',1201,'2016-06-07','0',,,,'not paid'
  100018,'john3',700,'2017-06-07','0',,,,'not paid'
  100021,'stev33',1001,'2016-06-07','0',,,,'not paid'
  100020,'john3',700,'2017-06-07','0',,,,'not paid'
  100007,'thomas89',615,'2016-06-07','1',,,,'not paid'
  100016,'thomas89',615,'2017-06-07','1',,,,'not paid'
  100019,'john3',350,'2017-06-07','0',,,,'not paid'
  100011,'jose1',251,'2017-06-07','1',,,,'not paid'
  100009,'john3',700,'2016-06-07','1',,,,'not paid'
  • Create temporary table with important bill and payment details
  CREATE TEMP TABLE bill_payment AS 
  SELECT
    bd.id, bd.partner_id, bd.amount AS bill_amount, bd.created_at::date AS bill_date, bd.error_code AS bill_error,
      py.amount AS payment_amount, py.created_at::date AS payment_date, (py.created_at::date - bd.created_at::date) AS payment_lag,
      CASE WHEN py.amount IS NULL THEN (current_date - bd.created_at::date) END AS no_pay_since,
      (bd.amount - CASE WHEN py.amount IS NULL THEN 0 ELSE py.amount END) AS balance,
      CASE WHEN py.amount IS NULL THEN 'not paid' ELSE 'paid' END AS status 
  FROM bill_details bd LEFT JOIN payment_details py ON bd.id = py.bill_id; --DROP TABLE bill_payment
  
  SELECT 21
  Query returned successfully in 103 msec.  
  • Using temporary table, show paid vs no paid details and group the details
  SELECT status, bill_error, SUM(bill_amount) AS billed_total, SUM(payment_amount) AS payment_total, SUM(balance) AS balance_total
  FROM bill_payment
  GROUP BY status, bill_error
  ORDER BY SUM(payment_amount);
  
  'paid','0','4510','4602.55','-92.55'
  'not paid','0','3952',,'3952'
  'not paid','1','2432',,'2432'  
  • Using temporary table, provide statistics grouped by banks
  SELECT 
    pb.id, pb.bank_name, SUM(bill_amount) AS billed_total, SUM(payment_amount) AS paid_total, 
      COUNT(bill_amount) AS billed_count, COUNT(payment_amount) AS paid_count, SUM(balance) AS balance,
      ROUND(AVG(payment_lag), 2) AS payment_lag_avg, ROUND(stddev_pop(payment_lag), 2) pay_lag_std, 
      MAX(payment_lag) AS pay_lag_max, MIN(payment_lag) AS pay_lag_min,
      ROUND(AVG(no_pay_since), 2) AS no_pay_since_avg, ROUND(stddev_pop(no_pay_since), 2) no_pay_since_std, 
      MAX(no_pay_since) AS no_pay_since_max, MIN(no_pay_since) AS no_pay_since_min
  FROM bill_payment bp
  LEFT JOIN driver_details dd ON bp.partner_id = dd.partner_id
  LEFT JOIN partner_bankinfo pb ON dd.bank_id = pb.id
  GROUP BY pb.id, pb.bank_name;
  
  'bancomer-mexico','Bancomer','7884','1700.20','11','3','6183.80','65.67','82.97',183,7,'367.50','182.50',550,185
  'citi-usa','Citi Bank','902','820.20','2','2','81.80','6.50','182.50',189,-176,,,,
  'bri-indonesia','Bank Rakyat','2108','2082.15','8','6','25.85','68.00','86.27',190,7,'367.50','182.50',550,185
  • Using a with clause, validate why no_pay_since_averages are the same for Bank Rakyat and Bancomer
  WITH no_pay_since_valid AS (
    SELECT bp.id, no_pay_since, bank_name
      FROM bill_payment bp
      LEFT JOIN driver_details dd ON bp.partner_id = dd.partner_id
      LEFT JOIN partner_bankinfo pb ON dd.bank_id = pb.id
      WHERE (pb.bank_name = 'Bank Rakyat' OR pb.bank_name = 'Bancomer') AND no_pay_since IS NOT NULL
  )
  SELECT 
      svout.bank_name, svout.no_pay_since, COUNT(svout.no_pay_since),
      (SELECT SUM(svin.no_pay_since) FROM no_pay_since_valid svin
       WHERE svout.bank_name = svin.bank_name
      ) AS sub_total,
      ROUND((SELECT SUM(svin.no_pay_since) FROM no_pay_since_valid svin
              WHERE svout.bank_name = svin.bank_name)/
            (SELECT COUNT(svin.no_pay_since)::DECIMAL FROM no_pay_since_valid svin
             WHERE svout.bank_name = svin.bank_name), 2
      ) AS sub_average
  FROM no_pay_since_valid svout
  GROUP BY bank_name, no_pay_since;
  
  'Bancomer',185,'4','2940','367.50'
  'Bancomer',550,'4','2940','367.50'
  'Bank Rakyat',550,'1','735','367.50'
  'Bank Rakyat',185,'1','735','367.50'
  • Find the transaction with transaction date errors
  SELECT * 
  FROM (
      SELECT 
        bd.id, bd.partner_id, bd.amount AS bill_amount, bd.created_at AS bill_date, bd.error_code AS bill_error,
        py.amount AS payment_amount, py.created_at AS payment_date, (py.created_at - bd.created_at) AS payment_lag
      FROM bill_details bd LEFT JOIN payment_details py ON bd.id = py.bill_id
  ) foo 
  WHERE payment_lag < '0';  
  
  100012,'jane22',451,'2017-12-07 22:00:21.996182','0','410.10','2017-06-14 22:00:21.996182','-176 days'
  • Show how many partners received payments in banks of Jakarta
  SELECT 
    COUNT(DISTINCT dd.partner_id) AS number_partners
  FROM driver_details dd
  LEFT JOIN bill_details bd ON dd.partner_id = bd.partner_id
  LEFT JOIN payment_details pd ON bd.id = pd.bill_id
  LEFT JOIN partner_bankinfo pb ON dd.bank_id = pb.id
  LEFT JOIN city_details cd ON cd.id = pb.bank_city_id
  WHERE pd.error_code = '0' AND cd.city_name = 'Jakarta'
    AND EXTRACT(WEEK FROM dd.last_trip) < (EXTRACT(WEEK FROM NOW()) - 2);  
    
  '2'  
  • Top 2 average payments by partners
  SELECT 
      dd.partner_id, dd.first_name || ' ' || dd.last_name AS partner_name, ROUND(AVG(bd.amount), 2) AS avg_bill,
      COUNT(bd.amount) AS number_bills, MAX(bd.created_at)::date AS last_bill
  FROM driver_details dd
  LEFT JOIN bill_details bd ON dd.partner_id = bd.partner_id
  LEFT JOIN partner_bankinfo pb ON pb.id = dd.bank_id
  LEFT JOIN city_details cd ON cd.id = pb.bank_city_id
  WHERE cd.currency_code = 'MXN' AND EXTRACT(YEAR FROM bd.created_at) = 2016
  GROUP BY dd.partner_id
  HAVING AVG(amount) IS NOT NULL --nulls should be investigated
  ORDER BY AVG(amount) DESC
  LIMIT 2;
  
  'stev33','Steven Lamas','951.00','3','2016-06-07'
  'john3','John Travolta','700.00','1','2016-06-07'  

Creating Database and Tables - PostgreSQL

CREATE TABLE city_details( -- DROP TABLE city_details CASCADE
		id INTEGER PRIMARY KEY,
     city_name VARCHAR(20),
     country_id INTEGER UNIQUE,
   	country_name VARCHAR(20),
   	currency_code VARCHAR(20)
);

CREATE TABLE driver_details( -- DROP TABLE driver_details CASCADE
		partner_id VARCHAR(20) PRIMARY KEY,
   	first_name VARCHAR(20),
     last_name VARCHAR(20),
     last_trip TIMESTAMP,
     bank_account INTEGER
);
  
CREATE TABLE partner_bankinfo( -- DROP TABLE partner_bankinfo CASCADE
		id VARCHAR(20) PRIMARY KEY,
    bank_routing INTEGER,
     bank_city_id INTEGER REFERENCES city_details(id),
     bank_name VARCHAR(20)
);

--forgot to add the relation to the bank_id
ALTER TABLE driver_details
	ADD COLUMN bank_id VARCHAR(20) REFERENCES partner_bankinfo(id);

CREATE TABLE bill_details( --DROP TABLE bill_details CASCADE
		id INTEGER PRIMARY KEY,
     partner_id VARCHAR(20) REFERENCES driver_details(partner_id),
  	created_at TIMESTAMP,
   	amount INTEGER,
   	error_code VARCHAR(20)
);

CREATE TABLE payment_details( -- DROP TABLE payment_details
	id INTEGER PRIMARY KEY,
    bill_id INTEGER REFERENCES bill_details(id),
    partner_id VARCHAR(20) REFERENCES driver_details(partner_id),
    created_at TIMESTAMP,
    amount DECIMAL,
    error_code VARCHAR(20)
);

Inserting Made-up Dataset

INSERT INTO city_details
VALUES
(10, 'Jakarta', 1, 'Indonesia', 'IDR'),
(20, 'California', 2, 'United States', 'USD'),
(30, 'Cozumel', 3, 'Mexico', 'MXN');

INSERT INTO partner_bankinfo
VALUES
('bri-indonesia', 3054654, 10, 'Bank Rakyat' ),
('citi-usa', 3054653, 20, 'Citi Bank'),
('bancomer-mexico', 3054634, 30, 'Bancomer');

INSERT INTO driver_details
VALUES
('jose1', 'Jose', 'Ortiz', '2017-11-07 22:21:16.261347-08', 6514651, 'bri-indonesia'),
('jane22', 'Jane', 'Smith', now(), 6541654, 'citi-usa'),
('stev33', 'Steven', 'Lamas', now(), 654654, 'bancomer-mexico'),
('raul889', 'Raul', 'Martinez', '2017-11-07 22:21:16.261347-08', 5465468, 'bri-indonesia'),
('samuel33', 'Samuel', 'Rodriguez', '2017-11-07 22:21:16.261347-08', 654651, 'bri-indonesia'),
('thomas89', 'Thomas', 'Tom', now(), 981651, 'bancomer-mexico'),
('john3', 'John', 'Travolta', '2017-11-07 22:21:16.261347-08', 6516216, 'bancomer-mexico'),
('henryindy', 'Henry', 'Jones', '2017-11-07 22:21:16.261347-08', 6551556, 'citi-usa'),
('juan34', 'Juan', 'Velasquez', '2017-11-07 22:21:16.261347-08', 6546546, 'bri-indonesia'),
('arturo3', 'Arturo', 'Pena', '2017-11-07 22:21:16.261347-08', 651651, 'bri-indonesia'),
('tiemma3', 'Tiemma', 'Williams', now(), 6516516, 'bancomer-mexico');

INSERT INTO bill_details
VALUES
(100001, 'jose1', '2016-06-07 22:00:21.996182', 250.5, 0),
(100002, 'jose1', '2016-06-07 22:00:21.996182', 250.5, 1),
(100003, 'jane22', '2016-12-07 22:00:21.996182', 450.5, 0),
(100004, 'stev33', '2016-06-07 22:00:21.996182', 650.5, 0),
(100005, 'samuel33', '2016-06-07 22:00:21.996182', 250.5, 0),
(100006, 'raul889', '2016-06-07 22:00:21.996182', 301.20, 0),
(100007, 'thomas89', '2016-06-07 22:00:21.996182', 615.15, 1),
(100009, 'john3', '2016-06-07 22:00:21.996182', 700.00, 1),
(100010, 'jose1', '2017-06-07 22:00:21.996182', 250.5, 0),
(100011, 'jose1', '2017-06-07 22:00:21.996182', 250.5, 1),
(100012, 'jane22', '2017-12-07 22:00:21.996182', 450.5, 0),
(100013, 'stev33', '2017-06-07 22:00:21.996182', 650.5, 0),
(100014, 'samuel33', '2017-06-07 22:00:21.996182', 250.5, 0),
(100015, 'raul889', '2017-06-07 22:00:21.996182', 301.20, 0),
(100016, 'thomas89', '2017-06-07 22:00:21.996182', 615.15, 1),
(100017, 'john3', '2017-06-07 22:00:21.996182', 700.00, 0);

INSERT INTO bill_details
VALUES
(100018, 'john3', '2017-06-07 22:00:21.996182', 700.00, 0),
(100019, 'john3', '2017-06-07 22:00:21.996182', 350.11, 0),
(100020, 'john3', '2017-06-07 22:00:21.996182', 700.00, 0),
(100021, 'stev33', '2016-06-07 22:00:21.996182', 1000.5, 0),
(100022, 'stev33', '2016-06-07 22:00:21.996182', 1200.5, 0);

INSERT INTO payment_details
VALUES
(654654, 100001, 'jose1', '2016-06-14 22:00:21.996182', 300.10, 0),
(465654, 100003, 'jane22', '2017-06-014 22:00:21.996182', 410.10, 0),
(845161, 100004, 'stev33', '2016-06-14 22:00:21.996182', 500.10, 0),
(644651, 100005, 'samuel33', '2016-06-14 22:00:21.996182', 255.6, 1),
(654156, 100006, 'raul889', '2016-12-14 22:00:21.996182', 220.5, 0),
(655854, 100010, 'jose1', '2017-06-14 22:00:21.996182', 350.10, 0),
(465974, 100012, 'jane22', '2017-06-014 22:00:21.996182', 410.10, 0),
(846461, 100013, 'stev33', '2017-06-14 22:00:21.996182', 500.10, 0),
(874651, 100014, 'samuel33', '2017-06-14 22:00:21.996182', 255.6, 1),
(696156, 100015, 'raul889', '2017-12-14 22:00:21.996182', 700.25, 0);

INSERT INTO payment_details
VALUES
(651651, 100017, 'john3', '2017-12-07 22:00:21.996182', 700.00, 0);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment