Skip to content

Instantly share code, notes, and snippets.

@olejorgenb
Last active April 7, 2024 00:35
Show Gist options
  • Save olejorgenb/9418bef65c65cd1f489557cfc08dde96 to your computer and use it in GitHub Desktop.
Save olejorgenb/9418bef65c65cd1f489557cfc08dde96 to your computer and use it in GitHub Desktop.
Firefox places.sqlite exploration (browser history)
--- Firefox places.sqlite exploration
-- https://developer.mozilla.org/en-US/docs/Mozilla/Tech/Places/Database
-- https://wiki.mozilla.org/images/d/d5/Places.sqlite.schema3.pdf
-- http://forensicswiki.org/wiki/Mozilla_Firefox_3_History_File_Format (probably somewhat outdated)
-- [select text -> right click -> search] does not set from_visit :(
-- Gotchas :angry-face: https://superuser.com/a/1405880/153095 (Explains why your history is incomplete)
--- Magic numbers
-- moz_historyvisits.visit_type:
-- https://dxr.mozilla.org/mozilla-esr60/source/toolkit/components/places/nsINavHistoryService.idl#1185
-- 1 TRANSITION_LINK The user followed a link and got a new toplevel window.
-- 2 TRANSITION_TYPED The user typed the page's URL in the URL bar or selected it from URL bar autocomplete results, clicked on it from a history query (from the History sidebar, History menu, or history query in the personal toolbar or Places organizer.
-- 3 TRANSITION_BOOKMARK The user followed a bookmark to get to the page.
-- 4 TRANSITION_EMBED Set when some inner content is loaded. This is true of all images on a page, and the contents of the iframe. It is also true of any content in a frame, regardless of whether or not the user clicked something to get there.
-- 5 TRANSITION_REDIRECT_PERMANENT The transition was a permanent redirect.
-- 6 TRANSITION_REDIRECT_TEMPORARY The transition was a temporary redirect.
-- 7 TRANSITION_DOWNLOAD The transition is a download.
-- 8 TRANSITION_FRAMED_LINK The user followed a link and got a visit in a frame.
-- 9 TRANSITION_RELOAD The page has been reloaded.
-- Cookies
select value from moz_cookies c
where c.name == "sessionToken" and c.host like "%.riddles.io"
-- Most visited hosts (the host name is reversed :D)
select p.rev_host, count(*)
from moz_places p
join moz_historyvisits v on v.place_id = p.id
group by p.rev_host order by count(*) desc;
-- Most visted places
select * from moz_places order by visit_count desc;
-- First ever visit
select date(min(visit_date)/1000000 , 'unixepoch') from moz_historyvisits;
-- Misc stats
select
((select count(*) from moz_places)+(select count(*) from moz_historyvisits)) as total,
(select count(*) from moz_places) as places,
(select count(*) from moz_historyvisits) as visits,
(select count(*) from moz_inputhistory) as inputs,
(select count(distinct rev_host) from moz_places) as hosts
;
-- Visits grouped by type
select visit_type, count(1) from moz_historyvisits group by visit_type;
-- Visits without a source (grouped by type)
select visit_type, count(*) from moz_historyvisits where from_visit = 0 group by visit_type;
-- Urls visited from a google search (or.. a google domain)
select count(pb.id)
from moz_historyvisits a
join moz_historyvisits b on a.id = b.from_visit
join moz_places pa on a.place_id = pa.id
join moz_places pb on b.place_id = pb.id
where pa.url like "%google.com%" and pb.url not like "%google%"
;
-- Approx google searches
select count(*) from moz_places where url like "%google%/search?%";
-- Approx duckduckgo searches
select count(*) from moz_places where url like "%duckduckgo%/?q=%";
-- Visits to a url (from_visit == 0 (probably) means the from_vist has been deleted
select datetime(hv.visit_date/1000000 , 'unixepoch') as visit_date, hv.from_visit
from moz_historyvisits hv join moz_places p on p.id = hv.place_id
where p.url like "$URL%"
;
-- Number of outgoing visits from a single history vist (NB: not counting redirects..)
select p.url, a.id, sum(b.visit_type not in (5,6)) as children
from moz_historyvisits a join moz_historyvisits b on a.id = b.from_visit
join moz_places p on p.id = a.place_id
group by a.id, p.id
order by children desc
;
-- Number of outgoing visits from a place (NB: not counting redirects..)
select p.url,
sum(b.visit_type not in (5,6)) as children,
sum(b.visit_type in (5,6)) as redirects
from moz_historyvisits a join moz_historyvisits b on a.id = b.from_visit
join moz_places p on p.id = a.place_id
group by p.id
order by children desc
;
-- Where did I find this url? (No matches means typed in or external source)
select from_place.id, from_place.url
from
moz_places as this_place,
moz_historyvisits as hv,
moz_historyvisits as hv_from,
moz_places as from_place
where
this_place.url like "%$URL%" and
this_place.id = hv.place_id and
hv.from_visit = hv_from.id and
hv_from.place_id = from_place.id
-- and from_place.rev_host <> this_place.rev_host -- optional filter
;
-- Dangling visits..
select count(1) from moz_historyvisits a left join moz_historyvisits b on a.from_visit = b.id
where b.id is null and a.from_visit > 0
;
-- Find all places visited less than N "hoops" away from a host
with recursive
host_visits(id) as
(select hv.id
from moz_places p
join moz_historyvisits hv on hv.place_id = p.id
where p.rev_host like "moc.rotanibmocy.swen%")
,
host_reachable(id, dist) as
(select *, 0 as dist from host_visits
union
select hv.id, (case when hv.visit_type in (5,6) then rr.dist else rr.dist+1 end) as dist
from host_reachable rr join moz_historyvisits hv on rr.id = hv.from_visit)
select
p.url, p.title,
datetime(max(hv.visit_date)/1000000 , 'unixepoch') as latest_visit_date,
count(hv.id), min(rr.dist)
from host_reachable rr
join moz_historyvisits hv on hv.id = rr.id
join moz_places p on p.id = hv.place_id
where
p.rev_host not like "moc.rotanibmocy.swen%"
-- and hv.visit_type not in (5,6)
and rr.dist < 20
group by p.url, p.title
order by latest_visit_date desc
;
-- Not sure what this is - long time ago
with recursive
leaf_path(leaf_id, id) as
(select leaf.id, leaf.id from moz_historyvisits leaf
left join moz_historyvisits child on child.from_visit = leaf.id
where child.id is null
union all
select lp.leaf_id, v.from_visit as id
from leaf_path lp join moz_historyvisits v on v.id = lp.id)
,
path_length(leaf_id, length) as
(select leaf_id, count(*) as length
from leaf_path lp join moz_historyvisits hv on hv.id = lp.id
where hv.visit_type not in (5, 6) -- ignore redirects
group by leaf_id)
,
root(leaf_id, root_id) as
(select leaf_id, min(id) as root_id from leaf_path)
,
distinct_hosts(leaf_id, count) as
(select leaf_id, count(distinct p.rev_host) as count
from leaf_path lp
join moz_historyvisits hv on hv.id = lp.id
join moz_places p on hv.place_id = p.id
where hv.visit_type not in (5, 6) -- ignore redirects
group by lp.leaf_id)
select lp.leaf_id, lp.id, hv.visit_type, p.url
from leaf_path lp
join moz_historyvisits hv on hv.id = lp.id
join moz_places p on p.id = hv.place_id
join path_length pl on pl.leaf_id = lp.leaf_id
join distinct_hosts dh on dh.leaf_id = lp.leaf_id
where dh.count > 6 and pl.length > 3
order by pl.length desc, lp.leaf_id, lp.id;
-- lp.leaf_id = (select leaf_id from (select leaf_id from path_length order by length desc limit 1 offset 100))
select * from moz_inputhistory order by length(input) desc;
select LENGTH(url), count(1) from moz_places group by LENGTH(url);
@cam8001
Copy link

cam8001 commented Mar 3, 2021

This was really helpful, thanks. If you want to reverse the host names, you can do the following:

  • Use the 'Most visited places' query above, exporting the result to CSV
  • Open the CSV in VS Code Rainbow CSV
  • Run a query like the following: select a1.split("").reverse().join(""), a2

I think you can just add arbitrary Javascript to RBQL, as I did above, which is hella handy.

@GunGunGun
Copy link

You can get almost all queries from daily Firefox urlbar usage from this source file: https://searchfox.org/mozilla-esr60/source/toolkit/components/places/nsNavBookmarks.cpp

Pretty cool if you want to port places.sqlite to be used globally across web browsers.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment