Last active
October 3, 2019 12:48
-
-
Save Skitionek/6042ba19d525dc41b839e939e085a23f to your computer and use it in GitHub Desktop.
Linear interpolation of missing data points (sparse dataset)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 60 * 60 * 24 = 86400 // number of seconds in day | |
WITH range AS (SELECT *, | |
EXTRACT(EPOCH FROM min) min_e, | |
EXTRACT(EPOCH FROM max) max_e | |
FROM | |
(SELECT min(date) AS min, max(date) AS max FROM prices) s) | |
SELECT | |
CASE WHEN prev_d <> min_e AND date <> g | |
THEN ROUND((m * g + ((price + prev_p) - m * (date + prev_d)) / 2)::NUMERIC, 2) | |
ELSE price END price, | |
date <> g interpolated, | |
TIMESTAMP WITHOUT TIME ZONE 'epoch' + g * INTERVAL '1 second' date | |
FROM | |
(SELECT *, | |
generate_series(prev_d + 86400, date, 86400) g | |
FROM | |
(SELECT *, | |
(price - prev_p) / (date - prev_d) m | |
FROM | |
(SELECT | |
id, | |
range.min_e, | |
range.max_e, | |
EXTRACT(EPOCH FROM | |
(lag(date, 1, range.min) OVER (PARTITION BY entity_id ORDER BY date)))::BIGINT prev_d, | |
EXTRACT(EPOCH FROM date)::BIGINT date, | |
(lag(price, 1, price) OVER (PARTITION BY entity_id ORDER BY date)) prev_p, | |
price | |
FROM | |
prices, | |
range) a | |
where date <> prev_d) b) c; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment