Created
June 26, 2014 10:38
-
-
Save stevelacey/4ab95ed51c5c2dfd2b4d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| select b.page_uri, x.*, a.body_words as previous_body_words, b.body_words, | |
| (case when a.body_words is null then b.body_words else (b.body_words - a.body_words) end) body_words_diff, | |
| (case when a.article_words is null then b.article_words else (b.article_words - a.article_words) end) article_words_diff, | |
| (case when a.page_or_article_words is null then b.page_or_article_words else (b.page_or_article_words - a.page_or_article_words) end) page_or_article_words_diff, | |
| (case when a.p_words is null then b.p_words else (b.p_words - a.p_words) end) p_words_diff, | |
| (case when a.p_count is null then b.p_count else (b.p_count - a.p_count) end) p_count_diff, | |
| (case when a.img_count is null then b.img_count else (b.img_count - a.img_count) end) img_count_diff | |
| from | |
| ( | |
| select | |
| (select crawl_id from changes where crawl_date < c.crawl_date order by crawl_date desc limit 1) previous_crawl_id, | |
| crawl_id, | |
| (select crawl_date from changes where crawl_date < c.crawl_date order by crawl_date desc limit 1) previous_crawl_date, | |
| crawl_date | |
| from changes c | |
| group by crawl_id, crawl_date | |
| order by max(crawl_date) | |
| ) x | |
| left join changes b on x.crawl_id = b.crawl_id | |
| left join changes a on b.page_uri = a.page_uri and x.previous_crawl_id = a.crawl_id | |
| where a.page_uri = 'x.com' | |
| order by x.previous_crawl_date |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment