Last active
April 7, 2024 00:35
-
-
Save olejorgenb/9418bef65c65cd1f489557cfc08dde96 to your computer and use it in GitHub Desktop.
Firefox places.sqlite exploration (browser history)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- Firefox places.sqlite exploration | |
-- https://developer.mozilla.org/en-US/docs/Mozilla/Tech/Places/Database | |
-- https://wiki.mozilla.org/images/d/d5/Places.sqlite.schema3.pdf | |
-- http://forensicswiki.org/wiki/Mozilla_Firefox_3_History_File_Format (probably somewhat outdated) | |
-- [select text -> right click -> search] does not set from_visit :( | |
-- Gotchas :angry-face: https://superuser.com/a/1405880/153095 (Explains why your history is incomplete) | |
--- Magic numbers | |
-- moz_historyvisits.visit_type: | |
-- https://dxr.mozilla.org/mozilla-esr60/source/toolkit/components/places/nsINavHistoryService.idl#1185 | |
-- 1 TRANSITION_LINK The user followed a link and got a new toplevel window. | |
-- 2 TRANSITION_TYPED The user typed the page's URL in the URL bar or selected it from URL bar autocomplete results, clicked on it from a history query (from the History sidebar, History menu, or history query in the personal toolbar or Places organizer. | |
-- 3 TRANSITION_BOOKMARK The user followed a bookmark to get to the page. | |
-- 4 TRANSITION_EMBED Set when some inner content is loaded. This is true of all images on a page, and the contents of the iframe. It is also true of any content in a frame, regardless of whether or not the user clicked something to get there. | |
-- 5 TRANSITION_REDIRECT_PERMANENT The transition was a permanent redirect. | |
-- 6 TRANSITION_REDIRECT_TEMPORARY The transition was a temporary redirect. | |
-- 7 TRANSITION_DOWNLOAD The transition is a download. | |
-- 8 TRANSITION_FRAMED_LINK The user followed a link and got a visit in a frame. | |
-- 9 TRANSITION_RELOAD The page has been reloaded. | |
-- Cookies | |
select value from moz_cookies c | |
where c.name == "sessionToken" and c.host like "%.riddles.io" | |
-- Most visited hosts (the host name is reversed :D) | |
select p.rev_host, count(*) | |
from moz_places p | |
join moz_historyvisits v on v.place_id = p.id | |
group by p.rev_host order by count(*) desc; | |
-- Most visted places | |
select * from moz_places order by visit_count desc; | |
-- First ever visit | |
select date(min(visit_date)/1000000 , 'unixepoch') from moz_historyvisits; | |
-- Misc stats | |
select | |
((select count(*) from moz_places)+(select count(*) from moz_historyvisits)) as total, | |
(select count(*) from moz_places) as places, | |
(select count(*) from moz_historyvisits) as visits, | |
(select count(*) from moz_inputhistory) as inputs, | |
(select count(distinct rev_host) from moz_places) as hosts | |
; | |
-- Visits grouped by type | |
select visit_type, count(1) from moz_historyvisits group by visit_type; | |
-- Visits without a source (grouped by type) | |
select visit_type, count(*) from moz_historyvisits where from_visit = 0 group by visit_type; | |
-- Urls visited from a google search (or.. a google domain) | |
select count(pb.id) | |
from moz_historyvisits a | |
join moz_historyvisits b on a.id = b.from_visit | |
join moz_places pa on a.place_id = pa.id | |
join moz_places pb on b.place_id = pb.id | |
where pa.url like "%google.com%" and pb.url not like "%google%" | |
; | |
-- Approx google searches | |
select count(*) from moz_places where url like "%google%/search?%"; | |
-- Approx duckduckgo searches | |
select count(*) from moz_places where url like "%duckduckgo%/?q=%"; | |
-- Visits to a url (from_visit == 0 (probably) means the from_vist has been deleted | |
select datetime(hv.visit_date/1000000 , 'unixepoch') as visit_date, hv.from_visit | |
from moz_historyvisits hv join moz_places p on p.id = hv.place_id | |
where p.url like "$URL%" | |
; | |
-- Number of outgoing visits from a single history vist (NB: not counting redirects..) | |
select p.url, a.id, sum(b.visit_type not in (5,6)) as children | |
from moz_historyvisits a join moz_historyvisits b on a.id = b.from_visit | |
join moz_places p on p.id = a.place_id | |
group by a.id, p.id | |
order by children desc | |
; | |
-- Number of outgoing visits from a place (NB: not counting redirects..) | |
select p.url, | |
sum(b.visit_type not in (5,6)) as children, | |
sum(b.visit_type in (5,6)) as redirects | |
from moz_historyvisits a join moz_historyvisits b on a.id = b.from_visit | |
join moz_places p on p.id = a.place_id | |
group by p.id | |
order by children desc | |
; | |
-- Where did I find this url? (No matches means typed in or external source) | |
select from_place.id, from_place.url | |
from | |
moz_places as this_place, | |
moz_historyvisits as hv, | |
moz_historyvisits as hv_from, | |
moz_places as from_place | |
where | |
this_place.url like "%$URL%" and | |
this_place.id = hv.place_id and | |
hv.from_visit = hv_from.id and | |
hv_from.place_id = from_place.id | |
-- and from_place.rev_host <> this_place.rev_host -- optional filter | |
; | |
-- Dangling visits.. | |
select count(1) from moz_historyvisits a left join moz_historyvisits b on a.from_visit = b.id | |
where b.id is null and a.from_visit > 0 | |
; | |
-- Find all places visited less than N "hoops" away from a host | |
with recursive | |
host_visits(id) as | |
(select hv.id | |
from moz_places p | |
join moz_historyvisits hv on hv.place_id = p.id | |
where p.rev_host like "moc.rotanibmocy.swen%") | |
, | |
host_reachable(id, dist) as | |
(select *, 0 as dist from host_visits | |
union | |
select hv.id, (case when hv.visit_type in (5,6) then rr.dist else rr.dist+1 end) as dist | |
from host_reachable rr join moz_historyvisits hv on rr.id = hv.from_visit) | |
select | |
p.url, p.title, | |
datetime(max(hv.visit_date)/1000000 , 'unixepoch') as latest_visit_date, | |
count(hv.id), min(rr.dist) | |
from host_reachable rr | |
join moz_historyvisits hv on hv.id = rr.id | |
join moz_places p on p.id = hv.place_id | |
where | |
p.rev_host not like "moc.rotanibmocy.swen%" | |
-- and hv.visit_type not in (5,6) | |
and rr.dist < 20 | |
group by p.url, p.title | |
order by latest_visit_date desc | |
; | |
-- Not sure what this is - long time ago | |
with recursive | |
leaf_path(leaf_id, id) as | |
(select leaf.id, leaf.id from moz_historyvisits leaf | |
left join moz_historyvisits child on child.from_visit = leaf.id | |
where child.id is null | |
union all | |
select lp.leaf_id, v.from_visit as id | |
from leaf_path lp join moz_historyvisits v on v.id = lp.id) | |
, | |
path_length(leaf_id, length) as | |
(select leaf_id, count(*) as length | |
from leaf_path lp join moz_historyvisits hv on hv.id = lp.id | |
where hv.visit_type not in (5, 6) -- ignore redirects | |
group by leaf_id) | |
, | |
root(leaf_id, root_id) as | |
(select leaf_id, min(id) as root_id from leaf_path) | |
, | |
distinct_hosts(leaf_id, count) as | |
(select leaf_id, count(distinct p.rev_host) as count | |
from leaf_path lp | |
join moz_historyvisits hv on hv.id = lp.id | |
join moz_places p on hv.place_id = p.id | |
where hv.visit_type not in (5, 6) -- ignore redirects | |
group by lp.leaf_id) | |
select lp.leaf_id, lp.id, hv.visit_type, p.url | |
from leaf_path lp | |
join moz_historyvisits hv on hv.id = lp.id | |
join moz_places p on p.id = hv.place_id | |
join path_length pl on pl.leaf_id = lp.leaf_id | |
join distinct_hosts dh on dh.leaf_id = lp.leaf_id | |
where dh.count > 6 and pl.length > 3 | |
order by pl.length desc, lp.leaf_id, lp.id; | |
-- lp.leaf_id = (select leaf_id from (select leaf_id from path_length order by length desc limit 1 offset 100)) | |
select * from moz_inputhistory order by length(input) desc; | |
select LENGTH(url), count(1) from moz_places group by LENGTH(url); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You can get almost all queries from daily Firefox urlbar usage from this source file: https://searchfox.org/mozilla-esr60/source/toolkit/components/places/nsNavBookmarks.cpp
Pretty cool if you want to port places.sqlite to be used globally across web browsers.