Last active
July 21, 2016 14:01
-
-
Save milimetric/004a8d05907605e4d3fe1c8cfacc9cea to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- join oldTitle, oldNamespace, timestamp to title, namespace, end and vice versa, should match perfectly | |
select sum(if(log_title is null, 1, 0)) as history_without_log_line, | |
sum(if(page_title is null, 1, 0)) as log_line_without_history | |
from mforns.simplewiki_page_history | |
FULL OUTER JOIN | |
milimetric.simplewiki_logging ON cast(log_title as string) = page_title | |
AND log_namespace = page_namespace | |
AND cast(log_timestamp as string) = `end` | |
AND `end` is not null | |
; | |
-- returns 0, 0 | |
-- group by title, namespace, end, should never be duplicated | |
select page_title, | |
page_namespace, | |
`end`, | |
count(*) | |
from mforns.simplewiki_page_history | |
group by | |
page_title, | |
page_namespace, | |
`end` | |
having count(*) > 1 | |
; | |
-- returns 24100 rows | |
-- join pageId, end is null, page_create_timestamp to page_id, rev_timestamp for rev_parent_id = 0 and vice versa, ditto | |
select sum(if(page_id is null, 1, 0)) as created_page_without_history, | |
sum(if(rev_page is null, 1, 0)) as history_without_created_page | |
from mforns.simplewiki_page_history | |
FULL OUTER JOIN | |
milimetric.simplewiki_revision ON page_id = rev_page | |
AND page_creation_timestamp = cast(rev_timestamp as string) | |
AND rev_parent_id = 0 | |
AND `end` is null | |
AND page_creation_timestamp is not null | |
AND page_id is not null | |
; | |
-- returns 4866731, 326601 | |
-- simpler way to check some of the above | |
select p.page_id | |
from milimetric.simplewiki_page p | |
left join | |
mforns.simplewiki_page_history h ON h.page_id = p.page_id | |
where h.page_id is null | |
; | |
-- returns 0 rows | |
select h.page_id | |
from mforns.simplewiki_page_history h | |
left join | |
milimetric.simplewiki_page p ON p.page_id = h.page_id | |
where h.page_id is not null | |
and p.page_id is null | |
; | |
-- returns 0 rows | |
select h.page_id, | |
h.page_creation_timestamp | |
from (select distinct page_id, page_creation_timestamp | |
from mforns.simplewiki_page_history | |
where page_id is not null | |
and `end` is null | |
) h | |
left join | |
milimetric.simplewiki_page p ON p.page_id = h.page_id | |
where p.page_id is null | |
; | |
-- returns 0 rows | |
-- general probing query | |
select coalesce(page_creation_timestamp, ' none ') as creation, | |
coalesce(start, ' none ') as start_time, | |
coalesce(`end`, ' none ') as end_time, | |
caused_by_event_type as event, | |
page_id, | |
page_namespace as ns, | |
page_title | |
from mforns.simplewiki_page_history | |
where page_title = 'Main_Page' | |
and page_namespace = 0 | |
order by | |
page_id, | |
start_time | |
limit 1000 | |
; | |
select coalesce(page_creation_timestamp, ' none ') as creation, | |
coalesce(start, ' none ') as start_time, | |
coalesce(`end`, ' none ') as end_time, | |
caused_by_event_type as event, | |
page_id, | |
page_namespace as ns, | |
page_title | |
from mforns.simplewiki_page_history | |
where page_title = 'Xbox' | |
and page_namespace = 1 | |
order by | |
page_id, | |
start_time | |
limit 1000 | |
; | |
select coalesce(page_creation_timestamp, ' none ') as creation, | |
coalesce(start, ' none ') as start_time, | |
coalesce(`end`, ' none ') as end_time, | |
caused_by_event_type as event, | |
page_id, | |
page_namespace as ns, | |
page_title, | |
page_id_artificial | |
from mforns.simplewiki_page_history | |
where page_id = 232335 | |
order by | |
page_id, | |
start_time | |
limit 1000 | |
; | |
select coalesce(page_creation_timestamp, ' none ') as creation, | |
coalesce(start, ' none ') as start_time, | |
coalesce(`end`, ' none ') as end_time, | |
caused_by_event_type as event, | |
page_id, | |
page_namespace as ns, | |
page_title, | |
page_id_artificial | |
from mforns.simplewiki_page_history | |
where page_id = 26975 | |
order by | |
page_id, | |
start_time | |
limit 1000 | |
; | |
select coalesce(page_creation_timestamp, ' none ') as creation, | |
coalesce(start, ' none ') as start_time, | |
coalesce(`end`, ' none ') as end_time, | |
caused_by_event_type as event, | |
page_id, | |
page_namespace as ns, | |
page_title, | |
page_id_artificial | |
from mforns.simplewiki_page_history | |
where page_id = 459354 | |
order by | |
page_id, | |
start_time | |
limit 1000 | |
; | |
select * | |
from logging | |
where log_type = 'delete' and log_action = 'delete' | |
and log_namespace = 0 | |
and ( | |
cast(log_title as char(100)) COLLATE utf8_general_ci in ('Main_Page', 'Main_Page/Prerelease') | |
or rtrim(replace(cast(log_params as char(1000)), '\n', '')) COLLATE utf8_general_ci in ('Main Page', 'Main_Page/Prerelease') | |
) | |
; | |
select count(*) | |
from mforns.simplewiki_page_history h | |
inner join | |
milimetric.simplewiki_revision r on r.rev_page = h.page_id | |
and cast(r.rev_timestamp as string) = h.page_creation_timestamp | |
and r.rev_parent_id = 0 | |
and h.caused_by_event_type = 'create' | |
; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment