Skip to content

Instantly share code, notes, and snippets.

@frafra
Created April 9, 2026 12:28
Show Gist options
  • Select an option

  • Save frafra/b9df1e46b9e7d4134814bc167eaa8185 to your computer and use it in GitHub Desktop.

Select an option

Save frafra/b9df1e46b9e7d4134814bc167eaa8185 to your computer and use it in GitHub Desktop.
GBIF backbone FTS with DuckDB
attach database 'backbone.db' as backbone (read_only);
use backbone;
select taxonID, canonicalName, scientificName, fts_main_taxon.match_bm25(taxonID, 'gulo gulo') as score
from taxon
where score is not null and taxonomicStatus = 'accepted'
order by score desc;
select taxonID, canonicalName, taxonomicStatus, vernacularName, fts_main_vernacular_name_no.match_bm25(vernacularID, 'fjellrev') as score
from vernacular_name_no
join taxon using (taxonID)
where score is not null
order by score desc;
INSTALL zipfs FROM community; LOAD zipfs;
INSTALL cache_httpfs FROM community; LOAD cache_httpfs;
CREATE TABLE taxon AS
FROM 'zip://https://ipt.artsdatabanken.no/archive.do?r=artsnavnebase/Taxon.tsv'
;
CREATE INDEX taxon_taxonid ON taxon (taxonID);
PRAGMA create_fts_index(
'taxon', 'taxonID', 'canonicalName'
);
CREATE TABLE vernacular_name AS
SELECT DISTINCT CONCAT_WS('|', taxonID, language, vernacularName) AS vernacularID, *
FROM 'zip://https://hosted-datasets.gbif.org/datasets/backbone/current/backbone.zip/VernacularName.tsv'
;
CREATE TABLE vernacular_name_no AS
FROM vernacular_name
WHERE language = 'no'
;
CREATE INDEX vernacular_name_taxonid ON vernacular_name (taxonID);
CREATE INDEX vernacular_name_language ON vernacular_name (language);
PRAGMA create_fts_index(
'vernacular_name_no', 'vernacularID', 'vernacularName', stemmer='norwegian'
);
DROP TABLE vernacular_name_no;
CREATE VIEW vernacular_name_no AS
FROM vernacular_name
WHERE language = 'no'
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment