Last active
December 12, 2015 03:18
-
-
Save whitequark/4705506 to your computer and use it in GitHub Desktop.
NoSQL is for those who can't learn SQL.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert raw stats into hourly stats. | |
def self.squash!(before=Time.now) | |
transaction do | |
range = where(%{ | |
date_trunc('hour', collected_at) < date_trunc('hour', timestamp ?) | |
}, before) | |
from_id, to_id = range.minimum(:id), range.maximum('id') | |
return nil if [from_id, to_id].none? | |
# Requires PostgreSQL >= 8.2 | |
connection.execute(%Q{ | |
INSERT INTO hourly_stats (date, hour, | |
resource_type, resource_id, site_id, | |
kind, | |
utm_source_id, | |
delta, | |
count) | |
SELECT date_trunc('day', collected_at) AS date, | |
EXTRACT(HOUR FROM collected_at) AS hour, | |
resource_type, resource_id, site_id, | |
kind, | |
utm_source_id, | |
-- Compute delta. | |
COUNT(*) AS delta, | |
-- Compute a cumulative total for count. | |
SUM(COUNT(*)) OVER w + | |
-- Fetch a preceding cumulative total. | |
COALESCE( | |
( | |
SELECT count FROM hourly_stats hs | |
WHERE hs.resource_type = rs.resource_type AND | |
hs.resource_id = rs.resource_id AND | |
hs.site_id = rs.site_id AND | |
hs.kind = rs.kind AND | |
hs.utm_source_id | |
IS NOT DISTINCT FROM | |
rs.utm_source_id | |
ORDER BY date DESC, hour DESC | |
LIMIT 1 | |
), 0 | |
) AS count | |
FROM raw_stats rs | |
WHERE id >= #{from_id} AND id <= #{to_id} | |
GROUP BY hour, date, | |
resource_type, resource_id, site_id, | |
kind, | |
utm_source_id | |
WINDOW w AS (PARTITION BY | |
resource_type, resource_id, site_id, | |
kind, | |
utm_source_id | |
-- This should've been ORDER BY date, hour... | |
-- but postgres is too stupid. | |
ORDER BY date_trunc('day', collected_at) ASC, | |
EXTRACT(HOUR FROM collected_at) ASC | |
ROWS BETWEEN UNBOUNDED PRECEDING | |
AND CURRENT ROW) | |
}) | |
where('id >= :from AND id <= :to', from: from_id, to: to_id). | |
delete_all | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment