Skip to content

Instantly share code, notes, and snippets.

@joshbrooks
Last active January 10, 2020 14:06
Show Gist options
  • Save joshbrooks/e661630600c6cc2693dbcd8819b0cd41 to your computer and use it in GitHub Desktop.
Save joshbrooks/e661630600c6cc2693dbcd8819b0cd41 to your computer and use it in GitHub Desktop.
A SQL query to generate a table

Inputs

Takes 3 tables. One is a list of attr One is a list of road_code One is a list of surveys

Expected output:

| road | break_attr | start_chainage | end_chainage | value | date_surveyed | | A01 | carriageway_width | 0 | 121841.47184 | 5 | NULL | | A01 | carriageway_width | 121841.47184 | 181425.02225 | 4.3 | NULL | | A01 | carriageway_width | 181425.02225 | 202312.37767 | 4.5 | NULL | | A01 | carriageway_width | 202312.37767 | NULL | NULL | NULL | | A01 | funding_source | 0 | 62405.11742 | JICA | NULL | | A01 | funding_source | 62405.11742 | 181425.02225 | ADB | NULL | | A01 | funding_source | 181425.02225 | NULL | NULL | NULL | | A01 | municipality | 0 | 62405.11742 | 6 | NULL | | A01 | municipality | 62405.11742 | 181425.02225 | 3 | NULL | | A01 | municipality | 181425.02225 | 202312.37767 | 9 | NULL | | A01 | municipality | 202312.37767 | NULL | NULL | NULL | | A01 | number_lanes | 40000 | 50000 | 10 | 2019-12-09 00:00:00+00 | | A01 | number_lanes | 50000 | NULL | NULL | 2019-12-09 00:00:00+00 | | A01 | pavement_class | 0 | 202312.37767 | 1 | NULL | | A01 | pavement_class | 202312.37767 | NULL | NULL | NULL |

-- Joshua Brooks
-- [email protected]
-- This disaggregates Surveys, creating a flat table of
-- values (really value changes) at specific chainages.
WITH values_to_chart AS (
SELECT DISTINCT skeys(values) attr FROM assets_survey
-- Some useful clauses:
-- SELECT 'surface_condition' attr
),
roads_to_chart AS (
SELECT DISTINCT road AS road_code FROM assets_survey, assets_road WHERE
assets_survey.road = assets_road.road_code
-- Some useful WHERE clauses:
-- WHERE assets_road.road_type = 'NAT'
-- WHERE road IN ('A02')
),
-- Surveys which match the given values and roads
su AS (
SELECT assets_survey.*, attr FROM assets_survey, values_to_chart, roads_to_chart
WHERE values ? values_to_chart.attr
AND road = roads_to_chart.road_code
-- Here we may also filter by other attributes such as
-- WHERE date_surveyed > '2011-01-01'
),
-- Where these roads have a survey start or end point
breakpoints AS (
SELECT DISTINCT * FROM (
SELECT attr, road, chainage_start c FROM su
UNION SELECT attr, road, chainage_end c FROM su
) xxxx
),
-- Merge the breakpoints into the surveys when a breakpoint is contained by the survey
-- Rank the surveys at each breakpoint by its survey date
-- and whether (preferred) it is not the end point of a survey
-- Where the end point is the only one, it means there is a gap between surveys.
-- Still need those end points sometimes when there are gaps
merge_breakpoints AS (
SELECT bp.attr AS break_attr, bp.c, su.*,
bp.c = su.chainage_end AS isend,
RANK() OVER (PARTITION BY bp.road, bp.c, bp.attr ORDER BY
CASE
WHEN bp.c = su.chainage_end THEN 1 ELSE 0
END,
date_surveyed DESC
-- Prefer when we are not at the end chainage of survey.
-- dateAvoids NULL values when surveys add the same value, same date.
)
FROM breakpoints bp, su
WHERE bp.road = su.road
AND bp.attr = su.attr
AND bp.c >= su.chainage_start
AND bp.c <= su.chainage_end
AND su.chainage_start != su.chainage_end
),
results AS (
SELECT rank, road, c, break_attr,
-- If the survey is actually the end value we NULLify the value
-- rather than using the attribute
CASE WHEN NOT isend THEN values -> break_attr ELSE NULL END as value,
values,
date_surveyed
FROM merge_breakpoints WHERE
rank = 1
ORDER BY road, c
),
-- Filters out situations where the value does not actually change between surveys
with_unchanged AS (
SELECT *,
rank() over (PARTITION BY road, break_attr, value, date_surveyed ORDER BY c) AS filtered FROM results
),
with_lead_values AS (
SELECT
road,
break_attr,
c as start_chainage,
-- Pick the previous end point
lead(c) over (PARTITION BY road, break_attr ORDER BY c) AS end_chainage,
value,
date_surveyed
FROM with_unchanged WHERE filtered = 1
) SELECT * FROM with_lead_values;
-- Or, here's an aggregate value
-- SELECT value, SUM(end_chainage - start_chainage) FROM with_lead_values
-- WHERE end_chainage IS NOT NULL GROUP BY value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment