Skip to content

Instantly share code, notes, and snippets.

@stevelacey
Created June 26, 2014 10:38
Show Gist options
  • Select an option

  • Save stevelacey/4ab95ed51c5c2dfd2b4d to your computer and use it in GitHub Desktop.

Select an option

Save stevelacey/4ab95ed51c5c2dfd2b4d to your computer and use it in GitHub Desktop.
select b.page_uri, x.*, a.body_words as previous_body_words, b.body_words,
(case when a.body_words is null then b.body_words else (b.body_words - a.body_words) end) body_words_diff,
(case when a.article_words is null then b.article_words else (b.article_words - a.article_words) end) article_words_diff,
(case when a.page_or_article_words is null then b.page_or_article_words else (b.page_or_article_words - a.page_or_article_words) end) page_or_article_words_diff,
(case when a.p_words is null then b.p_words else (b.p_words - a.p_words) end) p_words_diff,
(case when a.p_count is null then b.p_count else (b.p_count - a.p_count) end) p_count_diff,
(case when a.img_count is null then b.img_count else (b.img_count - a.img_count) end) img_count_diff
from
(
select
(select crawl_id from changes where crawl_date < c.crawl_date order by crawl_date desc limit 1) previous_crawl_id,
crawl_id,
(select crawl_date from changes where crawl_date < c.crawl_date order by crawl_date desc limit 1) previous_crawl_date,
crawl_date
from changes c
group by crawl_id, crawl_date
order by max(crawl_date)
) x
left join changes b on x.crawl_id = b.crawl_id
left join changes a on b.page_uri = a.page_uri and x.previous_crawl_id = a.crawl_id
where a.page_uri = 'x.com'
order by x.previous_crawl_date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment